### Import Dependencies

In [9]:
import pandas as pd
import numpy as np

### Read CSV file.

In [10]:
df = pd.read_csv("../scraped_csv/team_details/teamFranchiseDetails.csv")
df

Unnamed: 0,id,name,link,abbreviation,teamName,locationName,firstYearOfPlay,shortName,officialSiteUrl,franchiseId,...,division.name,division.nameShort,division.link,division.abbreviation,conference.id,conference.name,conference.link,franchise.franchiseId,franchise.teamName,franchise.link
0,10,Toronto Maple Leafs,/api/v1/teams/10,TOR,Maple Leafs,Toronto,1917,Toronto,http://www.mapleleafs.com/,5,...,Atlantic,ATL,/api/v1/divisions/17,A,6,Eastern,/api/v1/conferences/6,5,Maple Leafs,/api/v1/franchises/5


### Hard to see all columns, so lets see the transposed dataframe instead.

In [11]:
df = df.transpose()
df

Unnamed: 0,0
id,10
name,Toronto Maple Leafs
link,/api/v1/teams/10
abbreviation,TOR
teamName,Maple Leafs
locationName,Toronto
firstYearOfPlay,1917
shortName,Toronto
officialSiteUrl,http://www.mapleleafs.com/
franchiseId,5


### Looking at database schema, we only need some of these columns, so I will now remove columns that are not needed.

In [12]:
dropped_columns = [
    'link', 
    'teamName', 
    'shortName', 
    'franchiseId', 
    'active', 
    'venue.link', 
    'venue.city', 
    'venue.timeZone', 
    'division.id', 
    'division.abbreviation', 
    'division.link', 
    'conference.id', 
    'conference.link', 
    'franchise.franchiseId', 
    'franchise.teamName', 
    'franchise.link'
]
df = df.drop(dropped_columns)
df

Unnamed: 0,0
id,10
name,Toronto Maple Leafs
abbreviation,TOR
locationName,Toronto
firstYearOfPlay,1917
officialSiteUrl,http://www.mapleleafs.com/
venue.name,Scotiabank Arena
division.name,Atlantic
division.nameShort,ATL
conference.name,Eastern


### It will be easier to see full list and other blocks of code horizontally so we once again transpose the dataframe.

In [13]:
df = df.transpose()
df

Unnamed: 0,id,name,abbreviation,locationName,firstYearOfPlay,officialSiteUrl,venue.name,division.name,division.nameShort,conference.name
0,10,Toronto Maple Leafs,TOR,Toronto,1917,http://www.mapleleafs.com/,Scotiabank Arena,Atlantic,ATL,Eastern


### Let's rename these columns to something more meaningful.

In [14]:
renamed_columns = {
    'id': 'apiID', 
    'locationName': 'city', 
    'officialSiteUrl': 'websiteURL', 
    'venue.name': 'venue', 
    'division.name': 'divison', 
    'division.nameShort': 'divisionAbbr', 
    'conference.name':'conference', 
}
df = df.rename(columns=renamed_columns)
df

Unnamed: 0,apiID,name,abbreviation,city,firstYearOfPlay,websiteURL,venue,divison,divisionAbbr,conference
0,10,Toronto Maple Leafs,TOR,Toronto,1917,http://www.mapleleafs.com/,Scotiabank Arena,Atlantic,ATL,Eastern


### Let's check if the csv shows up as we want to in the format it will be inserted into the database in

In [15]:
payload = df.to_dict(orient='records')
payload

[{'apiID': 10,
  'name': 'Toronto Maple Leafs',
  'abbreviation': 'TOR',
  'city': 'Toronto',
  'firstYearOfPlay': 1917,
  'websiteURL': 'http://www.mapleleafs.com/',
  'venue': 'Scotiabank Arena',
  'divison': 'Atlantic',
  'divisionAbbr': 'ATL',
  'conference': 'Eastern'}]

### Lets finally export the cleansed dataframe into a csv file

In [16]:
title = '../cleaned_csv/teamFranchiseDetailsCleaned.csv'
df.to_csv(title, index=False)

In [24]:
# how to get csv back into dataframe

a = pd.read_csv('../cleaned_csv/teamFranchiseDetailsCleaned.csv')
b = a.to_dict(orient='list')
c = pd.DataFrame.from_dict(b, orient='columns')
c

Unnamed: 0,apiID,name,abbreviation,city,firstYearOfPlay,websiteURL,venue,divison,divisionAbbr,conference
0,10,Toronto Maple Leafs,TOR,Toronto,1917,http://www.mapleleafs.com/,Scotiabank Arena,Atlantic,ATL,Eastern
