# Soccer Team Match Statistics

Neda Jabbari, Erica Ho  
Date: 07/25/2019 

We try to explore the questions below:

- The name of the team
- The total number of goals scored by the team during the 2011 season
- A histogram visualization of the team's wins and losses for the 2011 season (store the visualization directly)
- The total number of wins the team earned during the 2011 season
- The team's win percentage on days where it was raining during games in the 2011 season.

The dataset is attempted from https://www.kaggle.com/laudanum/footballdelphi. We reterive the information from the FlatView_Advanced table.

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#!pip install pandasql
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
import requests

### Collect Data

To reterive the German teams.

In [None]:
conn = sqlite3.connect('database.sqlite')
c = conn.cursor()

In [None]:
c.execute("""select * from Matches;""")

Matches_df = pd.DataFrame(c.fetchall())
Matches_df.columns = [i[0] for i in c.description]
Matches_df.head()
Matches_df.shape

In [None]:
Matches_df.drop_duplicates(subset='Match_ID',inplace = True)   #no duplicate Match_IDs

To subset HomeTeams as German teams:
This is relevant to the last question where we get the weather information for only Germany.

In [None]:
germanteams_df = pd.read_sql_query('''SELECT * 
                                      FROM FlatView_Advanced''', conn)

In [None]:
teams_df= pd.DataFrame(germanteams_df.Unique_Team.unique())
teams_df.columns = ['team']
teams_df.head()

### The total number of goals scored by the team during the 2011 season. 

In [None]:
q = """
       SELECT * 
       FROM Matches_df m
       INNER JOIN teams_df t
       ON m.HomeTeam= t.team
       WHERE Season =2011;
       """
Matches2011_df = pysqldf(q)
Matches2011_df.shape

In [None]:
q = """
       SELECT HomeTeam, sum(FTHG) 
       FROM Matches2011_df 
       GROUP BY HomeTeam;
       """

HomeTeam_results = pysqldf(q)
HomeTeam_results.head()

In [None]:
q = """
       SELECT AwayTeam, sum(FTAG) 
       FROM Matches2011_df 
       GROUP BY AwayTeam;
       """
AwayTeam_results = pysqldf(q)
AwayTeam_results.head()

In [None]:
total_goals_df = pd.merge(HomeTeam_results, AwayTeam_results, left_on='HomeTeam', right_on='AwayTeam')
total_goals_df.head()

In [None]:
q = """select HomeTeam as Team, "sum(FTAG)"+"sum(FTHG)" as totalgoals from total_goals_df"""
totalgoals = pysqldf(q)
totalgoals.head()

In [None]:
def teamtotalgoals(team):
    return totalgoals.loc[totalgoals['Team']==team]

In [None]:
team = 'Aachen'
teamtotalgoals(team)

### A histogram visualization of the team's wins and losses for the 2011 season

In [None]:
def wins_loses(df, team):
    wins = 0
    loses = 0
    for i , row in df.iterrows():
        if team == row['HomeTeam']:
            if row['FTR'] == 'H':
                wins += 1
            if row['FTR'] == 'A':
                loses += 1
        if team == row['AwayTeam']:
            if row['FTR'] == 'A':
                wins += 1
            if row['FTR'] == 'H':
                loses += 1
                
    return (wins, loses)
          

In [None]:
team = 'Nurnberg'
wins_loses(Matches2011_df, team)

In [None]:
def bar(df, team):
    
    win_num, lose_num = wins_loses(df, team)
    
    return plt.bar(['win','loss'],[win_num, lose_num])

In [None]:
team = 'Nurnberg'
bar(Matches2011_df, team)

### The total number of wins the team earned during the 2011 season

In [None]:
def win_team(record):
    """ if FTHG > FTAG then return 'HomeTeam' name; else, return 'AwayTeam' name."""
    if record['FTHG'] > record['FTAG']:
        return record['HomeTeam']
    elif record['FTHG'] < record['FTAG']:
        return record['AwayTeam']

In [None]:
Matches2011_df['Winner'] = Matches2011_df.apply(win_team, axis=1)
Matches2011_df.head()

In [None]:
def numberofteamwins(team):
    return Matches2011_df['Winner'].value_counts()[team]

In [None]:
team= 'Bayern Munich'
numberofteamwins(team)

The team's win percentage on days where it was raining during games in the 2011 season.

In [None]:
import json
def get_key(path):
    with open(path) as f:
        return json.load(f)

key = get_key("/Users/flatironschool/.secret/drysky.json")
api_key = key['api_key']

In [None]:
list_of_dates = (pd.to_datetime(Matches2011_df['Date']) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
game_dates = list(set(list_of_dates))
latitude = 52.52
longtitude = 13.4050

def get_weather(game_dates):
    alldates = []
    for date in game_dates:
        url = f'''https://api.darksky.net/forecast/{api_key}/{latitude},{longtitude},{date}'''
        response = requests.get(url)
        alldates.append(response.json())
    return alldates

In [None]:
allweather = get_weather(game_dates)
allweather

In [None]:
def rainydays(allweather):
    rainydays_time=[]
    for i in allweather:
        if i['currently']['icon']=='rain':
            rainydays_time.append(i['currently']['time'])
    return rainydays_time

rain_dates= rainydays(allweather)

In [None]:
rain_dates_df = pd.DataFrame(rain_dates, columns = ['Timestamp_Date'])
rain_dates_df.head()

In [None]:
listofdates = (pd.to_datetime(Matches2011_df['Date']) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
Matches2011_df['Timestamp_AllDates'] = list_of_dates
Matches2011_df.head()

In [None]:
listofdates = (pd.to_datetime(Matches2011_df['Date']) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
listofdates = list(listofdates)
all_dates_df = pd.DataFrame(listofdates, columns= ['Timestamp_AllDates'])
all_dates_df.head()

In [None]:
merged=Matches2011_df.merge(rain_dates_df, left_on='Timestamp_AllDates', right_on='Timestamp_Date')
merged.head()

In [None]:
def winpercentage(team):
    totalAwayTeam= 0
    totalHomeTeam =0
    totalgames = 0
    if merged.loc[merged['Winner'] == team].shape[0]!=0:
        totalwins = merged['Winner'].value_counts()[team]
        
    if merged.loc[merged['HomeTeam'] == team].shape[0]!=0:
        totalHomeTeam = merged['HomeTeam'].value_counts()[team]
        
    if merged.loc[merged['AwayTeam'] == team].shape[0]!=0:
        totalAwayTeam= merged['AwayTeam'].value_counts()[team]
        
    totalgames= totalHomeTeam+totalAwayTeam
    percentagewin= totalwins*100/totalgames
    return percentagewin

In [None]:
team = 'Paderborn'
winpercentage(team)