In [10]:
import pandas as pd
import numpy as np
import os

In [11]:
votes = pd.read_csv(os.path.join("data", "votes.csv"))

cultural = pd.read_csv(os.path.join("data", "cultural_similarity.csv"), sep=";")

contestants = pd.read_csv(os.path.join("data", "contestants.csv"))

In [12]:
votes = votes[votes["year"]>=2000]

In [13]:
# create a dictionary with country keys as keys and full country names as values
countries = contestants["to_country"].unique()

country_dict = {}

# empty pandas dataframe
df = pd.DataFrame(columns=["short_name", "country"])

for country in countries:
    # find the short name of the country
    short_name = contestants[contestants["to_country"] == country]

    short_name_unique = short_name["to_country_id"].unique()

    # choose the shortest name
    short_name = min(short_name_unique, key=len)

    # add to dictionary
    country_dict[short_name] = country

    # add to dataframe

    # new dat
    tmp_dat = pd.DataFrame({"short_name": [short_name], "country": [country]})
    df = pd.concat([df, tmp_dat])


# save pandas dataframe as csv
#df.to_csv(os.path.join("data", "shortname_mapping.csv"), index=False)

In [14]:
country_dict

{'ch': 'Switzerland',
 'nl': 'Netherlands',
 'be': 'Belgium',
 'de': 'Germany',
 'fr': 'France',
 'lu': 'Luxembourg',
 'it': 'Italy',
 'dk': 'Denmark',
 'gb': 'United Kingdom',
 'at': 'Austria',
 'se': 'Sweden',
 'mc': 'Monaco',
 'no': 'Norway',
 'yu': 'Yugoslavia',
 'es': 'Spain',
 'fi': 'Finland',
 'pt': 'Portugal',
 'ie': 'Ireland',
 'mt': 'Malta',
 'il': 'Israel',
 'gr': 'Greece',
 'tr': 'Turkey',
 'ma': 'Morocco',
 'cy': 'Cyprus',
 'is': 'Iceland',
 'hr': 'Croatia',
 'ba': 'Bosnia & Herzegovina',
 'si': 'Slovenia',
 'pl': 'Poland',
 'hu': 'Hungary',
 'ru': 'Russia',
 'sk': 'Slovakia',
 'ro': 'Romania',
 'ee': 'Estonia',
 'lt': 'Lithuania',
 'mk': 'North Macedonia',
 'lv': 'Latvia',
 'ua': 'Ukraine',
 'cs': 'Serbia & Montenegro',
 'al': 'Albania',
 'Andorra': 'Andorra',
 'by': 'Belarus',
 'md': 'Moldova',
 'bg': 'Bulgaria',
 'am': 'Armenia',
 'rs': 'Serbia',
 'ge': 'Georgia',
 'me': 'Montenegro',
 'cz': 'Czech Republic',
 'az': 'Azerbaijan',
 'sm': 'San Marino',
 'au': 'Australia'}

In [15]:
# get only the countries that participated in the contest in the cultural similarity dataframe
cultural_1 = cultural[cultural["country1"].isin(countries)]
cultural_2 = cultural[cultural["country2"].isin(countries)]

cultural = pd.concat([cultural_1, cultural_2])

# drop duplicates
cultural = cultural.drop_duplicates()

In [16]:
def get_key(val, my_dict): 
    for key, value in my_dict.items(): 
        if val == value: 
            return key 
    

    return "key doesn't exist"

def points_neighbours(input_country, list_of_neighbours, data):   
    """
    """
    # get the points given to the input country
    points_received = data[data["to_country_id"] == input_country]

    # get the points given by the neighbours
    points = points_received[points_received["from_country_id"].isin(list_of_neighbours)]

    # get sum 
    points_sum = points["total_points"].sum()

    return points_sum

# Create cultural data

In [17]:
# empty pandas dataframe

df = pd.DataFrame()

# loop over dictionary
for shortname, longname in country_dict.items():
    # find the 5 most similar countries
    dat = cultural[cultural["country2"] == longname].sort_values(by="index", ascending=False)

    dat1 = cultural[cultural["country1"] == longname].sort_values(by="index", ascending=False)

    # concatenate
    dat = pd.concat([dat, dat1]).sort_values(by="index", ascending=False).head(5)

    
    # get the names of the countries
    list_of_neighbours = dat["country1"].unique()
    list_of_neighbours_2 = dat["country2"].unique()
    list_of_neighbours = np.concatenate([list_of_neighbours, list_of_neighbours_2])

    # only not long names
    list_of_neighbours_ln = [country for country in list_of_neighbours if country != longname]


    # get short names for the countries in the list (not the input country)
    list_of_neighbours = [get_key(country, country_dict) for country in list_of_neighbours_ln]

    # get the points given to the input country by the neighbours
    points = points_neighbours(shortname, list_of_neighbours, votes)

    # add to dataframe
    tmp_dat = pd.DataFrame({"country": [longname], "country_sn": [shortname], "countries_similar": [list_of_neighbours], "countries_similar_ln": [list_of_neighbours_ln], "points_culturally_similar": [points]})

    df = pd.concat([df, tmp_dat])

In [18]:
# save pandas dataframe as csv
# only those that do not have an empty list
df[df["countries_similar"] != "[]"].to_csv(os.path.join( "data", "votes_cultural_similarity.csv"), index=False)