# ETL Project
### ***NFL Draft Analysis***

In [1]:
#Import Dependencies
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

##### Create secrets.py file with username and password to pgadmin

In [2]:
#Import username and password from secrets.py file
#from secrets import username, password, database_name

# Extract

##### Extract the data from Wikipedia and  www.pro-football-reference.com. 
* Note: Since we are scraping data from tables we are using pandas otherwise we were going to need to use BeatifulSoup and Splinter

### Extract Combine Info from 2016-2020

In [3]:
#Assign Combine URL's to variable
combine_url_2016 =  'https://www.pro-football-reference.com/draft/2016-combine.htm'
combine_url_2017 = 'https://www.pro-football-reference.com/draft/2017-combine.htm'
combine_url_2018 = 'https://www.pro-football-reference.com/draft/2018-combine.htm'
combine_url_2019 = 'https://www.pro-football-reference.com/draft/2019-combine.htm'
combine_url_2020 = 'https://www.pro-football-reference.com/draft/2020-combine.htm#combine'

In [4]:
#Use Pandas to read in html tables from combine URL's
combine_2016 = pd.read_html(combine_url_2016)
combine_2017 = pd.read_html(combine_url_2017)
combine_2018 = pd.read_html(combine_url_2018)
combine_2019 = pd.read_html(combine_url_2019)
combine_2020 = pd.read_html(combine_url_2020)

In [5]:
#Extract the 0th HTML Table to get the necessary combine info
combine_df_2016 = combine_2016[0]
combine_df_2017 = combine_2017[0]
combine_df_2018 = combine_2018[0]
combine_df_2019 = combine_2019[0]
combine_df_2020 = combine_2020[0]
#Preiew the 2016 Combine DF - Combine Extraction Complete!
combine_df_2016

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr)
0,Mehdi Abdesmad,DE,Boston Col.,College Stats,6-6,284,5.10,29.5,25,108,7.55,4.62,
1,Vernon Adams,QB,Oregon,College Stats,5-11,200,4.83,29.5,,114,6.82,4.20,
2,Jerell Adams,TE,South Carolina,College Stats,6-5,247,4.64,32.5,,117,7.05,4.31,New York Giants / 6th / 184th pick / 2016
3,Bralon Addison,WR,Oregon,College Stats,5-9,197,4.66,34.5,13,116,6.95,4.14,
4,Roberto Aguayo,K,Florida State,College Stats,6-0,207,4.96,,,,,,Tampa Bay Buccaneers / 2nd / 59th pick / 2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,Daryl Worley,CB,West Virginia,College Stats,6-1,204,4.64,35.5,14,123,6.98,4.15,Carolina Panthers / 3rd / 77th pick / 2016
334,Connor Wujciak,DT,Boston Col.,College Stats,6-2,291,4.91,34.5,22,117,7.32,4.27,
335,Tavon Young,CB,Temple,College Stats,5-9,183,4.46,34.5,9,118,6.80,3.93,Baltimore Ravens / 4th / 104th pick / 2016
336,Avery Young,OT,Auburn,College Stats,6-5,328,5.39,,,,8.22,4.91,


### Extract Draft Info from 2016-2020

In [6]:
#Assign Draft URL's to variable
draft_url_2016 = 'https://en.wikipedia.org/wiki/2016_NFL_Draft'
draft_url_2017 = 'https://en.wikipedia.org/wiki/2017_NFL_Draft'
draft_url_2018 = 'https://en.wikipedia.org/wiki/2018_NFL_Draft'
draft_url_2019 = 'https://en.wikipedia.org/wiki/2019_NFL_Draft'
draft_url_2020 = 'https://en.wikipedia.org/wiki/2020_NFL_Draft'

In [7]:
#Use Pandas to read in html tables from draft URL's
draft_2016 = pd.read_html(draft_url_2016)
draft_2017 = pd.read_html(draft_url_2017)
draft_2018 = pd.read_html(draft_url_2018)
draft_2019 = pd.read_html(draft_url_2019)
draft_2020 = pd.read_html(draft_url_2020)

In [8]:
#Extract the 4th HTML Table to get the necessary combine info
draft_df_2016 = draft_2016[4]
draft_df_2017 = draft_2017[4]
draft_df_2018 = draft_2018[4]
draft_df_2019 = draft_2019[4]
draft_df_2020 = draft_2020[4]
#Preview the 2016 Draft DF - Draft Extraction Complete!
draft_df_2016

Unnamed: 0.1,Unnamed: 0,Rnd.,Pick No.,NFL team,Player,Pos.,College,Conf.,Notes
0,,1,1.0,Los Angeles Rams,Jared Goff †,QB,California,Pac-12,from Tennessee [R1 - 1]
1,,1,2.0,Philadelphia Eagles,Carson Wentz †,QB,North Dakota State,MVFC,from Cleveland [R1 - 2]
2,,1,3.0,San Diego Chargers,Joey Bosa †,DE,Ohio State,Big Ten,
3,,1,4.0,Dallas Cowboys,Ezekiel Elliott †,RB,Ohio State,Big Ten,
4,,1,5.0,Jacksonville Jaguars,Jalen Ramsey †,CB,Florida State,ACC,
...,...,...,...,...,...,...,...,...,...
252,,7,249.0,San Francisco 49ers,Prince Charles Iworah,CB,Western Kentucky,C-USA,from Kansas City [R7 - 11]
253,,7,250.0,Cleveland Browns,Scooby Wright,LB,Arizona,Pac-12,from New England via Miami [R7 - 12]
254,,7,251.0,Philadelphia Eagles,Joe Walker,LB,Oregon,Pac-12,from Arizona [R7 - 13]
255,,7,252.0,Carolina Panthers,Beau Sandland,TE,Montana State,Big Sky,


# Transform

### Concatenate and Clean Up Combine Data

In [9]:
#Add year to each combine DF
combine_df_2016['year']='2016'
combine_df_2017['year']='2017'
combine_df_2018['year']='2018'
combine_df_2019['year']='2019'
combine_df_2020['year']='2020'
#Add year to each draft DF
draft_df_2016['year']='2016'
draft_df_2017['year']='2017'
draft_df_2018['year']='2018'
draft_df_2019['year']='2019'
draft_df_2020['year']='2020'

In [10]:
#Preview 2016 Combine Data with Year Column Added
combine_df_2016

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),year
0,Mehdi Abdesmad,DE,Boston Col.,College Stats,6-6,284,5.10,29.5,25,108,7.55,4.62,,2016
1,Vernon Adams,QB,Oregon,College Stats,5-11,200,4.83,29.5,,114,6.82,4.20,,2016
2,Jerell Adams,TE,South Carolina,College Stats,6-5,247,4.64,32.5,,117,7.05,4.31,New York Giants / 6th / 184th pick / 2016,2016
3,Bralon Addison,WR,Oregon,College Stats,5-9,197,4.66,34.5,13,116,6.95,4.14,,2016
4,Roberto Aguayo,K,Florida State,College Stats,6-0,207,4.96,,,,,,Tampa Bay Buccaneers / 2nd / 59th pick / 2016,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
333,Daryl Worley,CB,West Virginia,College Stats,6-1,204,4.64,35.5,14,123,6.98,4.15,Carolina Panthers / 3rd / 77th pick / 2016,2016
334,Connor Wujciak,DT,Boston Col.,College Stats,6-2,291,4.91,34.5,22,117,7.32,4.27,,2016
335,Tavon Young,CB,Temple,College Stats,5-9,183,4.46,34.5,9,118,6.80,3.93,Baltimore Ravens / 4th / 104th pick / 2016,2016
336,Avery Young,OT,Auburn,College Stats,6-5,328,5.39,,,,8.22,4.91,,2016


In [11]:
#Combine 2016-2020 Combine DF's into 1 DF
combine_frames = [combine_df_2016, combine_df_2017, combine_df_2018, combine_df_2019, combine_df_2020]
combined_combine_df = pd.concat(combine_frames)

#Drop and Rename Columns
combined_combine_df = combined_combine_df.drop(columns = ['Drafted (tm/rnd/yr)', 'College'])
combined_combine_df = combined_combine_df.rename(columns = {'Player': 'name', 'Ht': 'Height', 'Wt': 'Weight', '40yd': 'Forty_Yard',
                                                            '3Cone': 'Three_Cone', 'year': 'Year', 'Broad Jump': 'Broad_Jump',
                                                           'Pos': 'position'})
#Preview the Combined combine DF
combined_combine_df

Unnamed: 0,name,position,School,Height,Weight,Forty_Yard,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle,Year
0,Mehdi Abdesmad,DE,Boston Col.,6-6,284,5.10,29.5,25,108,7.55,4.62,2016
1,Vernon Adams,QB,Oregon,5-11,200,4.83,29.5,,114,6.82,4.20,2016
2,Jerell Adams,TE,South Carolina,6-5,247,4.64,32.5,,117,7.05,4.31,2016
3,Bralon Addison,WR,Oregon,5-9,197,4.66,34.5,13,116,6.95,4.14,2016
4,Roberto Aguayo,K,Florida State,6-0,207,4.96,,,,,,2016
...,...,...,...,...,...,...,...,...,...,...,...,...
338,D.J. Wonnum,DL,South Carolina,6-5,258,4.73,34.5,20,123,7.25,4.44,2020
339,Dom Wood-Anderson,TE,Tennessee,6-4,261,4.92,35.0,,119,,,2020
340,David Woodward,LB,Utah State,6-2,230,4.79,33.5,16,114,7.34,4.37,2020
341,Chase Young,DL,Ohio State,6-5,264,,,,,,,2020


In [12]:
#Convert Height to Float
def fix(string):
    try:
        feet = int(string.split('-')[0])
    
        inches = int(string.split('-')[1])

    #thats for feet and inches
        return feet * 12 + inches
    except:
        return string
    
combined_combine_df['Height'] = combined_combine_df['Height'].apply(fix)
#Preview the Complete total combine DF - Combine DF Cleanup Complete!
combined_combine_df

Unnamed: 0,name,position,School,Height,Weight,Forty_Yard,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle,Year
0,Mehdi Abdesmad,DE,Boston Col.,78,284,5.10,29.5,25,108,7.55,4.62,2016
1,Vernon Adams,QB,Oregon,71,200,4.83,29.5,,114,6.82,4.20,2016
2,Jerell Adams,TE,South Carolina,77,247,4.64,32.5,,117,7.05,4.31,2016
3,Bralon Addison,WR,Oregon,69,197,4.66,34.5,13,116,6.95,4.14,2016
4,Roberto Aguayo,K,Florida State,72,207,4.96,,,,,,2016
...,...,...,...,...,...,...,...,...,...,...,...,...
338,D.J. Wonnum,DL,South Carolina,77,258,4.73,34.5,20,123,7.25,4.44,2020
339,Dom Wood-Anderson,TE,Tennessee,76,261,4.92,35.0,,119,,,2020
340,David Woodward,LB,Utah State,74,230,4.79,33.5,16,114,7.34,4.37,2020
341,Chase Young,DL,Ohio State,77,264,,,,,,,2020


In [13]:
combine_table_df = combined_combine_df.drop(columns = ['position', 'School', 'Height', 'Weight', 'Year'])
combine_table_df

Unnamed: 0,name,Forty_Yard,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle
0,Mehdi Abdesmad,5.10,29.5,25,108,7.55,4.62
1,Vernon Adams,4.83,29.5,,114,6.82,4.20
2,Jerell Adams,4.64,32.5,,117,7.05,4.31
3,Bralon Addison,4.66,34.5,13,116,6.95,4.14
4,Roberto Aguayo,4.96,,,,,
...,...,...,...,...,...,...,...
338,D.J. Wonnum,4.73,34.5,20,123,7.25,4.44
339,Dom Wood-Anderson,4.92,35.0,,119,,
340,David Woodward,4.79,33.5,16,114,7.34,4.37
341,Chase Young,,,,,,


### Concatenate and Clean Up Draft Data

In [14]:
#Preview the 2016 Draft DF with year column added
draft_df_2016

Unnamed: 0.1,Unnamed: 0,Rnd.,Pick No.,NFL team,Player,Pos.,College,Conf.,Notes,year
0,,1,1.0,Los Angeles Rams,Jared Goff †,QB,California,Pac-12,from Tennessee [R1 - 1],2016
1,,1,2.0,Philadelphia Eagles,Carson Wentz †,QB,North Dakota State,MVFC,from Cleveland [R1 - 2],2016
2,,1,3.0,San Diego Chargers,Joey Bosa †,DE,Ohio State,Big Ten,,2016
3,,1,4.0,Dallas Cowboys,Ezekiel Elliott †,RB,Ohio State,Big Ten,,2016
4,,1,5.0,Jacksonville Jaguars,Jalen Ramsey †,CB,Florida State,ACC,,2016
...,...,...,...,...,...,...,...,...,...,...
252,,7,249.0,San Francisco 49ers,Prince Charles Iworah,CB,Western Kentucky,C-USA,from Kansas City [R7 - 11],2016
253,,7,250.0,Cleveland Browns,Scooby Wright,LB,Arizona,Pac-12,from New England via Miami [R7 - 12],2016
254,,7,251.0,Philadelphia Eagles,Joe Walker,LB,Oregon,Pac-12,from Arizona [R7 - 13],2016
255,,7,252.0,Carolina Panthers,Beau Sandland,TE,Montana State,Big Sky,,2016


In [15]:
#Combine 2016-2020 Draft DF's into 1 DF
draft_frames = [draft_df_2016, draft_df_2017, draft_df_2018, draft_df_2019, draft_df_2020]
combined_draft_df = pd.concat(draft_frames)
#Drop and Rename Draft DF Columns
combined_draft_df = combined_draft_df.drop(columns = ['Notes', 'Unnamed: 0',])
combined_draft_df = combined_draft_df.rename(columns = {"Player": "name", "NFL team": "NFL_Team", "Pos.": "position", 
                                                        "College":"School", "Conf.": "Conf", 
                                                        "Rnd.": "Round", "Pick No.": "Pick_No", "year": "Year"})
#Preview the combined draft DF
combined_draft_df

Unnamed: 0,Round,Pick_No,NFL_Team,name,position,School,Conf,Year
0,1,1.0,Los Angeles Rams,Jared Goff †,QB,California,Pac-12,2016
1,1,2.0,Philadelphia Eagles,Carson Wentz †,QB,North Dakota State,MVFC,2016
2,1,3.0,San Diego Chargers,Joey Bosa †,DE,Ohio State,Big Ten,2016
3,1,4.0,Dallas Cowboys,Ezekiel Elliott †,RB,Ohio State,Big Ten,2016
4,1,5.0,Jacksonville Jaguars,Jalen Ramsey †,CB,Florida State,ACC,2016
...,...,...,...,...,...,...,...,...
251,7*,251.0,Seattle Seahawks,Stephen Sullivan,TE,LSU,SEC,2020
252,7*,252.0,Denver Broncos,Tyrie Cleveland,WR,Florida,SEC,2020
253,7*,253.0,Minnesota Vikings,Kyle Hinton,C,Washburn,MIAA,2020
254,7*,254.0,Denver Broncos,Derrek Tuszka,DE,North Dakota State,MVFC,2020


In [16]:
teams_df = combined_draft_df.drop(columns = ['Round','Pick_No', 'position','School','Conf','Year'])
teams_df

Unnamed: 0,NFL_Team,name
0,Los Angeles Rams,Jared Goff †
1,Philadelphia Eagles,Carson Wentz †
2,San Diego Chargers,Joey Bosa †
3,Dallas Cowboys,Ezekiel Elliott †
4,Jacksonville Jaguars,Jalen Ramsey †
...,...,...
251,Seattle Seahawks,Stephen Sullivan
252,Denver Broncos,Tyrie Cleveland
253,Minnesota Vikings,Kyle Hinton
254,Denver Broncos,Derrek Tuszka


In [17]:
draft_table_df = combined_draft_df.drop(columns = ['NFL_Team', 'position','School','Conf','Year'])
draft_table_df

Unnamed: 0,Round,Pick_No,name
0,1,1.0,Jared Goff †
1,1,2.0,Carson Wentz †
2,1,3.0,Joey Bosa †
3,1,4.0,Ezekiel Elliott †
4,1,5.0,Jalen Ramsey †
...,...,...,...
251,7*,251.0,Stephen Sullivan
252,7*,252.0,Tyrie Cleveland
253,7*,253.0,Kyle Hinton
254,7*,254.0,Derrek Tuszka


In [18]:
info_frames = [combined_combine_df, combined_draft_df]
#Combine both player dataframes

combined_info_df = pd.concat(info_frames)
#Drop duplicate players in combined_player_df
info_df = combined_info_df.drop_duplicates(subset='name', keep='first', ignore_index=True)
# Drop columns we are not using for the info table
info_df = info_df.drop(columns = ['Forty_Yard','Vertical','Bench','Broad_Jump','Three_Cone','Shuttle',
'Round','Pick_No','NFL_Team',])
#-----------------------------------------------------------------
# Define School DF
schools_df = info_df.drop(columns = ['position', 'Height','Weight', 'Year'])
#-----------------------------------------------------------------

#Drop School Column
info_df = info_df.drop(columns = ['School','Conf'])


#Preview combined_player_df
info_df

Unnamed: 0,name,position,Height,Weight,Year
0,Mehdi Abdesmad,DE,78,284,2016
1,Vernon Adams,QB,71,200,2016
2,Jerell Adams,TE,77,247,2016
3,Bralon Addison,WR,69,197,2016
4,Roberto Aguayo,K,72,207,2016
...,...,...,...,...,...
2001,Chris Jackson,CB,,,2020
2002,Chris Williamson,CB,,,2020
2003,Sam Sloman,K,,,2020
2004,Kyle Hinton,C,,,2020


#### School DataFrame


In [19]:
schools_df

Unnamed: 0,name,School,Conf
0,Mehdi Abdesmad,Boston Col.,
1,Vernon Adams,Oregon,
2,Jerell Adams,South Carolina,
3,Bralon Addison,Oregon,
4,Roberto Aguayo,Florida State,
...,...,...,...
2001,Chris Jackson,Marshall,C-USA
2002,Chris Williamson,Minnesota,Big Ten
2003,Sam Sloman,Miami (OH),MAC
2004,Kyle Hinton,Washburn,MIAA


##### Need to get one single dataframe of just the players

In [20]:
players_table_df = pd.DataFrame(info_df['name'])
players_table_df

Unnamed: 0,name
0,Mehdi Abdesmad
1,Vernon Adams
2,Jerell Adams
3,Bralon Addison
4,Roberto Aguayo
...,...
2001,Chris Jackson
2002,Chris Williamson
2003,Sam Sloman
2004,Kyle Hinton


# Check dataframes before loading

In [21]:
# Check dataframes before loading

# players_table_df
# info_df
# teams_df
# combine_table_df
# draft_table_df

players_table_df

Unnamed: 0,name
0,Mehdi Abdesmad
1,Vernon Adams
2,Jerell Adams
3,Bralon Addison
4,Roberto Aguayo
...,...
2001,Chris Jackson
2002,Chris Williamson
2003,Sam Sloman
2004,Kyle Hinton


In [22]:
info_df

Unnamed: 0,name,position,Height,Weight,Year
0,Mehdi Abdesmad,DE,78,284,2016
1,Vernon Adams,QB,71,200,2016
2,Jerell Adams,TE,77,247,2016
3,Bralon Addison,WR,69,197,2016
4,Roberto Aguayo,K,72,207,2016
...,...,...,...,...,...
2001,Chris Jackson,CB,,,2020
2002,Chris Williamson,CB,,,2020
2003,Sam Sloman,K,,,2020
2004,Kyle Hinton,C,,,2020


In [41]:
schools_df


Unnamed: 0,name,School,Conf
0,Mehdi Abdesmad,Boston Col.,
1,Vernon Adams,Oregon,
2,Jerell Adams,South Carolina,
3,Bralon Addison,Oregon,
4,Roberto Aguayo,Florida State,
...,...,...,...
2001,Chris Jackson,Marshall,C-USA
2002,Chris Williamson,Minnesota,Big Ten
2003,Sam Sloman,Miami (OH),MAC
2004,Kyle Hinton,Washburn,MIAA


In [24]:
teams_df

Unnamed: 0,NFL_Team,name
0,Los Angeles Rams,Jared Goff †
1,Philadelphia Eagles,Carson Wentz †
2,San Diego Chargers,Joey Bosa †
3,Dallas Cowboys,Ezekiel Elliott †
4,Jacksonville Jaguars,Jalen Ramsey †
...,...,...
251,Seattle Seahawks,Stephen Sullivan
252,Denver Broncos,Tyrie Cleveland
253,Minnesota Vikings,Kyle Hinton
254,Denver Broncos,Derrek Tuszka


In [25]:
combine_table_df

Unnamed: 0,name,Forty_Yard,Vertical,Bench,Broad_Jump,Three_Cone,Shuttle
0,Mehdi Abdesmad,5.10,29.5,25,108,7.55,4.62
1,Vernon Adams,4.83,29.5,,114,6.82,4.20
2,Jerell Adams,4.64,32.5,,117,7.05,4.31
3,Bralon Addison,4.66,34.5,13,116,6.95,4.14
4,Roberto Aguayo,4.96,,,,,
...,...,...,...,...,...,...,...
338,D.J. Wonnum,4.73,34.5,20,123,7.25,4.44
339,Dom Wood-Anderson,4.92,35.0,,119,,
340,David Woodward,4.79,33.5,16,114,7.34,4.37
341,Chase Young,,,,,,


In [26]:
draft_table_df

Unnamed: 0,Round,Pick_No,name
0,1,1.0,Jared Goff †
1,1,2.0,Carson Wentz †
2,1,3.0,Joey Bosa †
3,1,4.0,Ezekiel Elliott †
4,1,5.0,Jalen Ramsey †
...,...,...,...
251,7*,251.0,Stephen Sullivan
252,7*,252.0,Tyrie Cleveland
253,7*,253.0,Kyle Hinton
254,7*,254.0,Derrek Tuszka


NameError: name 'a' is not defined

# Load

In [42]:
#Connect to local database
rds_connection_string = f'{username}:{password}@localhost:5432/{database_name}'
engine = create_engine(f'{username}+psycopg2://{rds_connection_string}')

NameError: name 'username' is not defined

In [None]:
# Check the tables names to make sure where are we posting
engine.table_names()

In [None]:
# check the keys of the tables to make sure everything match exactly.
#if match EXACTLY WITH THE SAME NAME FOR COLUMNS IN THE DATAFRAME AND IN THE TABLE KEYS GO TO NEXT CELL.
combine_table=engine.execute('SELECT * FROM combine')
players_table=engine.execute('SELECT * FROM player')
draft_table=engine.execute('SELECT * FROM draft')
print(combine_table.keys())
print(players_table.keys())
print(draft_table.keys())

In [None]:
# players_table_df
# info_df
# teams_df
# combine_table_df
# draft_table_df
# schools_df

In [None]:
# Load data using pandas. name stands for table name, change it if neccesary.
players_table_df.to_sql(name='player', con=engine, if_exists='append', index=False)

In [None]:
#Confirm the data has been Load. Check table name. Create a dataframe to get player_id to the other dataframes
player_id_df=pd.read_sql_query('select * from player', con=engine)
player_id_df

In [None]:
info_table_df = pd.merge(info_df, player_id_df, how = 'inner', on = 'name')
info_table_df = info_table_df.drop(columns = 'name')

## convert Height and Weight to numeric
info_table_df['Height'] = pd.to_numeric(info_table_df['Height'], errors='coerce')
#com_df_final['Ht'].convert_dtypes(infer_objects=True, convert_string=True)
info_table_df['Weight'] = pd.to_numeric(info_table_df['Height'], errors='coerce')
info_table_df['position'] = info_table_df['position'].convert_dtypes(infer_objects=True, convert_string=True)

info_table_df.info()

In [None]:
teams_table_df = pd.merge(teams_df, player_id_df, how = 'inner', on = 'name')
teams_table_df = info_table_df.drop(columns = 'name')

teams_table_df['NFL_Team'] = teams_table_df['position'].convert_dtypes(infer_objects=True, convert_string=True)

teams_table_df

In [None]:
combine_df_final = pd.merge(combine_table_df, player_id_df, how = 'inner', on = 'name')
combine_df_final = combine_table_df.drop(columns = 'name')

# Convert
##### The columns names needs to be changed
combine_df_final['Forty_Yard'] = pd.to_numeric(combine_df_final['Forty_Yard'], errors='coerce')
combine_df_final['Bench'] = pd.to_numeric(combine_df_final['Bench'], errors='coerce')
combine_df_final['Vertical'] = pd.to_numeric(combine_df_final['Vertical'], errors='coerce')
combine_df_final['Broad_Jump'] = pd.to_numeric(combine_df_final['Broad Jump'], errors='coerce')
combine_df_final['Three_Cone'] = pd.to_numeric(combine_df_final['Three_Cone'], errors='coerce')
combine_df_final['Shuttle'] = pd.to_numeric(combine_df_final['Shuttle'], errors='coerce')
# Get the info of the dataframe
combine_df_final.info()



In [None]:
# Convert types to fit in the database
#### we are using to numeric for floats and convert_dtypes method for string
#### Height needs to be string because is in 6 feet 1 inch format. others are float
combine_df_final['Ht'] = combine_df_final['Ht'].convert_dtypes(infer_objects=True, convert_string=True)
combine_df_final['Wt'] = pd.to_numeric(combine_df_final['Wt'], errors='coerce')
combine_df_final['40yd'] = pd.to_numeric(combine_df_final['40yd'], errors='coerce')
combine_df_final['Bench'] = pd.to_numeric(combine_df_final['Bench'], errors='coerce')
combine_df_final['Vertical'] = pd.to_numeric(combine_df_final['Vertical'], errors='coerce')
combine_df_final['Broad Jump'] = pd.to_numeric(combine_df_final['Broad Jump'], errors='coerce')
combine_df_final['3Cone'] = pd.to_numeric(combine_df_final['3Cone'], errors='coerce')
combine_df_final['Shuttle'] = pd.to_numeric(combine_df_final['Shuttle'], errors='coerce')
# Get the info of the dataframe
combine_df_final.info()

In [None]:
# merge with draft dataframe to get the serial id.
draft_df_final = pd.merge(draft_table_df, player_id_df, how='inner', on='player')
draft_df_final = draft_table_df.drop(columns = 'name')

# # Convert types to fit in the database
 
draft_df_final['Round'] = pd.to_numeric(draft_df_final['Round'], errors='coerce')
draft_df_final['Pick_no'] = pd.to_numeric(draft_df_final['Pick_no'], errors='coerce')
# draft_df_final['Pick_no'].convert_dtypes(infer_objects=True, convert_integer=True)
draft_df_final['Conf'] = draft_df_final['Conf'].convert_dtypes(infer_objects=True, convert_string=True)


draft_df_final.info()

In [None]:
schools_df_final = pd.merge(schools_df, player_id_df, how='inner', on='player')

schools_df_final = schools_df_final.drop(columns = 'name')

schools_df_final['School'] = schools_df_final['School'].convert_dtypes(infer_objects=True, convert_string=True)
schools_df_final['Conf'] = schools_df_final['Conf'].convert_dtypes(infer_objects=True, convert_string=True)

schools_df_final.info()

In [None]:
# Load data using pandas. name stands for table name, change it if neccesary.
info_table_df.to_sql(name='info', con=engine, if_exists='append', index=False)
teams_table_df.to_sql(name='teams', con=engine, if_exists='append', index=False)
combine_table_df.to_sql(name='combine', con=engine, if_exists='append', index=False)
draft_df_final.to_sql(name='draft', con=engine, if_exists='append', index=False)
schools_df_final.to_sql(name='draft', con=engine, if_exists='append', index=False)

### Load DataFrames


In [None]:
# Load data using pandas. name stands for table name, change it if neccesary.
info_table_df.to_sql(name='info', con=engine, if_exists='append', index=False)
teams_table_df.to_sql(name='teams', con=engine, if_exists='append', index=False)
combine_table_df.to_sql(name='combine', con=engine, if_exists='append', index=False)
draft_df_final.to_sql(name='draft', con=engine, if_exists='append', index=False)
schools_df_final.to_sql(name='draft', con=engine, if_exists='append', index=False)

In [None]:
# Clean the combine df final by dropping columns we don't need and renaming id

combine_df_final = combine_df_final.drop(columns = ['school', 'School','player', 'year_y', 'pos', 'Pos', 'year_x'])
combine_df_final = combine_df_final.rename(columns = {'id':'player_id'})

# Convert types to fit in the database
#### we are using to numeric for floats and convert_dtypes method for string
#### Height needs to be string because is in 6 feet 1 inch format. others are float
combine_df_final['Ht'] = combine_df_final['Ht'].convert_dtypes(infer_objects=True, convert_string=True)
combine_df_final['Wt'] = pd.to_numeric(combine_df_final['Wt'], errors='coerce')
combine_df_final['40yd'] = pd.to_numeric(combine_df_final['40yd'], errors='coerce')
combine_df_final['Bench'] = pd.to_numeric(combine_df_final['Bench'], errors='coerce')
combine_df_final['Vertical'] = pd.to_numeric(combine_df_final['Vertical'], errors='coerce')
combine_df_final['Broad Jump'] = pd.to_numeric(combine_df_final['Broad Jump'], errors='coerce')
combine_df_final['3Cone'] = pd.to_numeric(combine_df_final['3Cone'], errors='coerce')
combine_df_final['Shuttle'] = pd.to_numeric(combine_df_final['Shuttle'], errors='coerce')
# Get the info of the dataframe
combine_df_final.info()

In [None]:
# Clean the draft df final by dropping columns we don't need and renaming id

draft_df_final = draft_df_final.drop(columns = ['school','School','player', 'year_y', 'pos', 'Pos', 'year_x'])
draft_df_final = draft_df_final.rename(columns = {'id':'player_id'})

# # Convert types to fit in the database
### Rnd coulb be either a string or a float because we are not supossed to do any math operation with it. 
draft_df_final['Rnd'] = pd.to_numeric(draft_df_final['Rnd'], errors='coerce')
draft_df_final['Pick_no'] = draft_df_final['Pick_no'].convert_dtypes(infer_objects=True, convert_integer=True)
draft_df_final['Conf'] = draft_df_final['Conf'].convert_dtypes(infer_objects=True, convert_string=True)


draft_df_final.info()

In [None]:
# Database ready to work