In [1]:
import pandas as pd

from code.fasttext.embedding_utils import TableEncoder, compare_embeddings_of, show_most_similar_rows

## Computing row embedding - only known categorical columns

In [2]:
tabenc = TableEncoder()

In [3]:
stadium_df = pd.read_csv(
    '/home/giovanni/unimore/TESI/src/data/uk_football/List_of_football_stadiums_in_England_1.csv'
    ).drop(['Rank\n(England only)', 'Image', 'Capacity'], axis=1).dropna(ignore_index=True)

pl1_df = pd.read_csv(
    '/home/giovanni/unimore/TESI/src/data/uk_football/Premier_League_1.csv'
    ).drop('Capacity', axis=1).dropna(ignore_index=True)

In [4]:
stadium_df.head()

Unnamed: 0,Stadium,Town / City,Team,League
0,Old Trafford,"Old Trafford, Greater Manchester",Manchester United,Premier League
1,Tottenham Hotspur Stadium,"Tottenham, London",Tottenham Hotspur,Premier League
2,London Stadium,"Stratford, London",West Ham United,Premier League
3,Anfield,"Anfield, Liverpool",Liverpool,Premier League
4,Emirates Stadium,"Holloway, London",Arsenal,Premier League


In [5]:
pl1_df.head()

Unnamed: 0,Team,Location,Stadium
0,Arsenal,London (Holloway),Emirates Stadium
1,Aston Villa,Birmingham,Villa Park
2,Bournemouth,Bournemouth,Vitality Stadium
3,Brentford,London (Brentford),Gtech Community Stadium
4,Brighton & Hove Albion,Brighton,American Express Stadium


### No label input in embedding creation

In [6]:
row = stadium_df.loc[1]
row

Stadium        Tottenham Hotspur Stadium
Town / City            Tottenham, London
Team                   Tottenham Hotspur
League                    Premier League
Name: 1, dtype: object

In [7]:
for idx, row in stadium_df.head(3).iterrows():
    for cell in row:
        print(cell)
    print()

Old Trafford
Old Trafford, Greater Manchester
Manchester United
Premier League

Tottenham Hotspur Stadium
Tottenham, London
Tottenham Hotspur
Premier League

London Stadium
Stratford, London
West Ham United
Premier League



In [8]:
comparison_rows = compare_embeddings_of(stadium_df, pl1_df, tabenc, False, True)

In [9]:
show_most_similar_rows(comparison_rows, stadium_df, pl1_df)

#0: 0.9979841291826658
	19: Bramall Lane Sheffield Sheffield United Premier League
	16: Sheffield United Sheffield Bramall Lane

#1: 0.997338704988714
	3: Anfield Anfield, Liverpool Liverpool Premier League
	10: Liverpool Liverpool (Anfield) Anfield

#2: 0.9971854556226278
	49: Turf Moor Burnley Burnley Premier League
	5: Burnley Burnley Turf Moor

#3: 0.9968611565221516
	9: Stamford Bridge Fulham, London Chelsea Premier League
	6: Chelsea London (Fulham) Stamford Bridge

#4: 0.9965761043608271
	6: St James' Park Newcastle upon Tyne Newcastle United Premier League
	14: Newcastle United Newcastle upon Tyne St James' Park



## Comparing row embeddings with complete dataframes

In [10]:
stadium_df = pd.read_csv(
    '/home/giovanni/unimore/TESI/src/data/uk_football/List_of_football_stadiums_in_England_1.csv'
    )

pl1_df = pd.read_csv(
    '/home/giovanni/unimore/TESI/src/data/uk_football/Premier_League_1.csv'
    )

In [11]:
stadium_df.head()

Unnamed: 0,Rank\n(England only),Stadium,Town / City,Capacity,Team,League,Image
0,1.0,Wembley Stadium,"Wembley, London",90000,"England (Men's, women's and youth)",,
1,2.0,Old Trafford,"Old Trafford, Greater Manchester",74031,Manchester United,Premier League,
2,3.0,Tottenham Hotspur Stadium,"Tottenham, London",62850,Tottenham Hotspur,Premier League,
3,4.0,London Stadium,"Stratford, London",62500,West Ham United,Premier League,
4,5.0,Anfield,"Anfield, Liverpool",61276,Liverpool,Premier League,


In [12]:
pl1_df.head()

Unnamed: 0,Team,Location,Stadium,Capacity
0,Arsenal,London (Holloway),Emirates Stadium,60704
1,Aston Villa,Birmingham,Villa Park,42657
2,Bournemouth,Bournemouth,Vitality Stadium,11307
3,Brentford,London (Brentford),Gtech Community Stadium,17250
4,Brighton & Hove Albion,Brighton,American Express Stadium,31876


In [13]:
comparison_rows = compare_embeddings_of(stadium_df, pl1_df, tabenc, False, True)

In [14]:
show_most_similar_rows(comparison_rows, stadium_df, pl1_df)

#0: 0.9970643808660449
	20: 20.0 Bramall Lane Sheffield 32,050 Sheffield United Premier League nan
	16: Sheffield United Sheffield Bramall Lane 32,050

#1: 0.996481330753703
	50: 44.0 Turf Moor Burnley 21,744 Burnley Premier League nan
	5: Burnley Burnley Turf Moor 21,944

#2: 0.9960149821092582
	25: 24.0 City Ground West Bridgford 30,404 Nottingham Forest Premier League nan
	15: Nottingham Forest West Bridgford City Ground 30,404

#3: 0.995936514528914
	4: 5.0 Anfield Anfield, Liverpool 61,276 Liverpool Premier League nan
	10: Liverpool Liverpool (Anfield) Anfield 61,276

#4: 0.9953896916495484
	10: 11.0 Stamford Bridge Fulham, London 40,173 Chelsea Premier League nan
	6: Chelsea London (Fulham) Stamford Bridge 40,173

