# Importing Python Libraries

In [1]:
import requests
import json
import pandas as pd
import sqlite3

In [2]:
conn = sqlite3.connect('database.sqlite')
cur = conn.cursor()

In [3]:
cur.execute("""SELECT Match_ID, TeamName, HomeTeam, AwayTeam, Date, Season, Div, 
                FTHG AS HomeTeamGoals, FTAG AS AwayTeamGoals, FTR AS Winners
                FROM Matches
                JOIN Teams_in_Matches
                USING (Match_ID)
                JOIN Unique_Teams
                USING (Unique_Team_ID)
                WHERE Season = 2011
                ;""")
df = pd.DataFrame(cur.fetchall())
df.columns = [x[0] for x in cur.description]
df.head()

Unnamed: 0,Match_ID,TeamName,HomeTeam,AwayTeam,Date,Season,Div,HomeTeamGoals,AwayTeamGoals,Winners
0,1092,Bayern Munich,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A
1,1092,Nurnberg,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A
2,1093,Bayern Munich,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A
3,1093,Stuttgart,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A
4,1094,Bayern Munich,Wolfsburg,Bayern Munich,2011-08-13,2011,D1,0,1,A


In [4]:
all_games = len(df)/2
non_German_div_index = df[df['Div'] == "E0" ].index
df.drop(non_German_div_index , inplace=True)
german_games = len(df)/2
print("{} games removed!".format(int(all_games - german_games)))

380 games removed!


In [5]:
for i, row in df.iterrows():
    if row.TeamName == row.AwayTeam:
        if row.Winners == 'A':
            df.loc[i, 'Result'] = 'W'
        if row.Winners == 'D':
            df.loc[i, 'Result'] = 'D'
        if row.Winners == 'H':
            df.loc[i, 'Result'] = 'L'
    else:
        if row.Winners == 'H':
            df.loc[i, 'Result'] = 'W'
        if row.Winners == 'D':
            df.loc[i, 'Result'] = 'D'
        if row.Winners == 'A':
            df.loc[i, 'Result'] = 'L'

In [6]:
df.head()

Unnamed: 0,Match_ID,TeamName,HomeTeam,AwayTeam,Date,Season,Div,HomeTeamGoals,AwayTeamGoals,Winners,Result
0,1092,Bayern Munich,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A,W
1,1092,Nurnberg,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A,L
2,1093,Bayern Munich,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A,W
3,1093,Stuttgart,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A,L
4,1094,Bayern Munich,Wolfsburg,Bayern Munich,2011-08-13,2011,D1,0,1,A,W


In [7]:
for i, row in df.iterrows():
    if row.TeamName == row.AwayTeam:
        df.loc[i, 'GoalsScored'] = df.loc[i, 'AwayTeamGoals']
    elif row.TeamName == row.HomeTeam:
        df.loc[i, 'GoalsScored'] = df.loc[i, 'HomeTeamGoals']

In [8]:
df.head()

Unnamed: 0,Match_ID,TeamName,HomeTeam,AwayTeam,Date,Season,Div,HomeTeamGoals,AwayTeamGoals,Winners,Result,GoalsScored
0,1092,Bayern Munich,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A,W,1.0
1,1092,Nurnberg,Nurnberg,Bayern Munich,2012-03-31,2011,D1,0,1,A,L,0.0
2,1093,Bayern Munich,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A,W,2.0
3,1093,Stuttgart,Stuttgart,Bayern Munich,2011-12-11,2011,D1,1,2,A,L,1.0
4,1094,Bayern Munich,Wolfsburg,Bayern Munich,2011-08-13,2011,D1,0,1,A,W,1.0


In [9]:
total_goals_scored = df.pivot_table(index = 'TeamName', aggfunc='sum')
total_goals_scored = total_goals_scored.rename(columns={"GoalsScored":"Total Goals Scored (2011 Season)"})
total_goals_scored = total_goals_scored.drop(columns = ['AwayTeamGoals','HomeTeamGoals','Match_ID','Season'])
total_goals_scored = total_goals_scored.reset_index()
total_goals_scored['Total Goals Scored (2011 Season)'].astype(int)
total_goals_scored.head()

Unnamed: 0,TeamName,Total Goals Scored (2011 Season)
0,Aachen,30.0
1,Augsburg,36.0
2,Bayern Munich,77.0
3,Bochum,41.0
4,Braunschweig,37.0


In [10]:
wins_and_losses = df.pivot_table(df, index = 'TeamName', columns=['Result'], aggfunc='count')
wins_and_losses = wins_and_losses.drop(columns = ['AwayTeam', 'AwayTeamGoals', 'Date', 'Div','HomeTeamGoals','Match_ID','Season','GoalsScored','HomeTeam'])
wins_and_losses.head()


Unnamed: 0_level_0,Winners,Winners,Winners
Result,D,L,W
TeamName,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Aachen,13,15,6
Augsburg,14,12,8
Bayern Munich,4,7,23
Bochum,7,17,10
Braunschweig,15,9,10


In [11]:
wins_and_losses.columns = [col[1] for col in wins_and_losses.columns]
wins_and_losses.reset_index(inplace=True)
wins_and_losses.head(1)

Unnamed: 0,TeamName,D,L,W
0,Aachen,13,15,6


In [106]:
team_data = pd.concat([total_goals_scored, wins_and_losses], axis=1)
team_data = team_data.loc[:,~team_data.columns.duplicated()] # the ~ inverts the boolean statement from team_data.columns.duplicated() because wewant to keep non-duplicates
team_data.head()

Unnamed: 0,TeamName,Total Goals Scored (2011 Season),D,L,W
0,Aachen,30.0,13,15,6
1,Augsburg,36.0,14,12,8
2,Bayern Munich,77.0,4,7,23
3,Bochum,41.0,7,17,10
4,Braunschweig,37.0,15,9,10


In [17]:
cur.execute("""SELECT *
                FROM Matches
                WHERE Season = 2011
                ;""")
df_2 = pd.DataFrame(cur.fetchall())
df_2.columns = [x[0] for x in cur.description]
df_2.head(100)

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A
1,1093,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A
2,1094,D1,2011,2011-08-13,Wolfsburg,Bayern Munich,0,1,A
3,1095,D1,2011,2011-11-27,Mainz,Bayern Munich,3,2,H
4,1096,D1,2011,2012-02-18,Freiburg,Bayern Munich,0,0,D
...,...,...,...,...,...,...,...,...,...
95,1187,D1,2011,2012-03-04,Hoffenheim,FC Koln,1,1,D
96,1188,D1,2011,2011-09-17,Leverkusen,FC Koln,1,4,A
97,1189,D1,2011,2012-03-31,Augsburg,FC Koln,2,1,H
98,1190,D1,2011,2011-10-01,Hertha,FC Koln,3,0,H


In [18]:
len(df_2)

992

In [20]:
non_German_div_index = df_2[df_2['Div'] == "E0" ].index
df_2.drop(non_German_div_index , inplace=True)

In [22]:
len(df_2)

612

# Connecting to the DarkSky API

In [27]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

In [28]:
keys = get_keys("/Users/ravimalde/.secret/dark_sky_api.json")
api_key = keys['api_key']

In [66]:
latitude = 52.5200
longitude = 13.4050

date = "2020-01-30"
url = "https://api.darksky.net/forecast/{}/{},{},{}T16:00:00".format(api_key, latitude, longitude, date)
r = requests.get(url)

In [67]:
r.json().keys()

dict_keys(['latitude', 'longitude', 'timezone', 'currently', 'hourly', 'daily', 'flags', 'offset'])

In [68]:
r.json()['daily']

{'data': [{'time': 1580338800,
   'summary': 'Light rain until afternoon, starting again overnight.',
   'icon': 'rain',
   'sunriseTime': 1580367180,
   'sunsetTime': 1580399280,
   'moonPhase': 0.18,
   'precipIntensity': 0.0074,
   'precipIntensityMax': 0.0338,
   'precipIntensityMaxTime': 1580374380,
   'precipProbability': 0.8,
   'precipType': 'rain',
   'temperatureHigh': 45.37,
   'temperatureHighTime': 1580389200,
   'temperatureLow': 40.73,
   'temperatureLowTime': 1580413980,
   'apparentTemperatureHigh': 37.77,
   'apparentTemperatureHighTime': 1580389320,
   'apparentTemperatureLow': 34.74,
   'apparentTemperatureLowTime': 1580418420,
   'dewPoint': 37.22,
   'humidity': 0.87,
   'pressure': 1006.9,
   'windSpeed': 13.54,
   'windGust': 34.6,
   'windGustTime': 1580368080,
   'windBearing': 238,
   'cloudCover': 0.93,
   'uvIndex': 1,
   'uvIndexTime': 1580383440,
   'visibility': 10,
   'ozone': 338.1,
   'temperatureMin': 37.05,
   'temperatureMinTime': 1580349540,
   't

In [32]:
small_df = df_2.head(2)
small_df

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A
1,1093,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A


In [39]:
latitude = 52.5200
longitude = 13.4050

for i, row in small_df.iterrows():
    
    date = df.loc[i, 'Date']
    url = "https://api.darksky.net/forecast/{}/{},{},{}T16:00:00".format(api_key, latitude, longitude, date)
    r = requests.get(url)
#     rain_probability = r.json()['currently']['precipProbability']
    weather_summary = r.json()['daily']['data'][0]['summary']
#     small_df['ChanceOfRain'] = rain_probability
    small_df['Weather'] = weather_summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [34]:
small_df

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Weather
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,Partly Cloudy
1,1093,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A,Partly Cloudy


In [82]:
# latitude = 52.5200
# longitude = 13.4050

# for i, row in df_2.iterrows():
    
#     date = df_2.loc[i, 'Date']
#     url = "https://api.darksky.net/forecast/{}/{},{},{}T16:00:00".format(api_key, latitude, longitude, date)
#     r = requests.get(url)
#     weather_summary = r.json()['daily']['data'][0].get("icon", "clear")
#     df_2.loc[i, 'Weather'] = weather_summary

In [85]:
df_2.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Weather
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,rain
1,1093,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A,partly-cloudy-day
2,1094,D1,2011,2011-08-13,Wolfsburg,Bayern Munich,0,1,A,partly-cloudy-day
3,1095,D1,2011,2011-11-27,Mainz,Bayern Munich,3,2,H,wind
4,1096,D1,2011,2012-02-18,Freiburg,Bayern Munich,0,0,D,partly-cloudy-day


In [101]:
df_2.to_csv("matches_with_weather.csv")

In [102]:
df_matches_with_weather = df_2.copy()

In [103]:
df_matches_with_weather.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Weather
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,rain
1,1093,D1,2011,2011-12-11,Stuttgart,Bayern Munich,1,2,A,partly-cloudy-day
2,1094,D1,2011,2011-08-13,Wolfsburg,Bayern Munich,0,1,A,partly-cloudy-day
3,1095,D1,2011,2011-11-27,Mainz,Bayern Munich,3,2,H,wind
4,1096,D1,2011,2012-02-18,Freiburg,Bayern Munich,0,0,D,partly-cloudy-day


In [105]:
df_matches_with_weather['Weather'].unique()

array(['rain', 'partly-cloudy-day', 'wind', 'clear', 'clear-day',
       'cloudy'], dtype=object)

In [95]:
df_matches_with_weather.drop(
    df_matches_with_weather[df_matches_with_weather['Weather'] != "rain"]
    .index, inplace=True)

In [96]:
len(df_matches_with_weather)

136

In [100]:
df_matches_with_weather.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Weather
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,rain
5,1097,D1,2011,2012-01-20,M'gladbach,Bayern Munich,3,1,H,rain
8,1100,D1,2011,2011-09-18,Schalke 04,Bayern Munich,0,2,A,rain
15,1107,D1,2011,2012-05-05,FC Koln,Bayern Munich,1,4,A,rain
18,1110,D1,2011,2011-12-17,Freiburg,Dortmund,1,4,A,rain


In [99]:
df_rainy_matches = df_matches_with_weather
df_rainy_matches.head()

Unnamed: 0,Match_ID,Div,Season,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Weather
0,1092,D1,2011,2012-03-31,Nurnberg,Bayern Munich,0,1,A,rain
5,1097,D1,2011,2012-01-20,M'gladbach,Bayern Munich,3,1,H,rain
8,1100,D1,2011,2011-09-18,Schalke 04,Bayern Munich,0,2,A,rain
15,1107,D1,2011,2012-05-05,FC Koln,Bayern Munich,1,4,A,rain
18,1110,D1,2011,2011-12-17,Freiburg,Dortmund,1,4,A,rain


In [110]:
rainy_matches_dict = {}

for i, row in df_rainy_matches.iterrows():
    rainy_matches_dict[df_rainy_matches.loc[i, 'Match_ID']] = df_rainy_matches.loc[i,'Weather']

# rainy_matches_dict