In [120]:
import pandas as pd
data = pd.read_csv("academy_awards.csv", encoding = "ISO-8859-1")

In [121]:
print(data.head())

          Year               Category          Nominee  \
0  2010 (83rd)  Actor -- Leading Role    Javier Bardem   
1  2010 (83rd)  Actor -- Leading Role     Jeff Bridges   
2  2010 (83rd)  Actor -- Leading Role  Jesse Eisenberg   
3  2010 (83rd)  Actor -- Leading Role      Colin Firth   
4  2010 (83rd)  Actor -- Leading Role     James Franco   

                          Additional Info Won? Unnamed: 5 Unnamed: 6  \
0                      Biutiful {'Uxbal'}   NO        NaN        NaN   
1           True Grit {'Rooster Cogburn'}   NO        NaN        NaN   
2  The Social Network {'Mark Zuckerberg'}   NO        NaN        NaN   
3    The King's Speech {'King George VI'}  YES        NaN        NaN   
4              127 Hours {'Aron Ralston'}   NO        NaN        NaN   

  Unnamed: 7 Unnamed: 8 Unnamed: 9 Unnamed: 10  
0        NaN        NaN        NaN         NaN  
1        NaN        NaN        NaN         NaN  
2        NaN        NaN        NaN         NaN  
3        NaN        Na

In [122]:
print(data["Unnamed: 10"].value_counts())

*    1
Name: Unnamed: 10, dtype: int64


In [123]:
print(data["Additional Info"].value_counts())

Metro-Goldwyn-Mayer                                                                                                                                                                  60
Walt Disney, Producer                                                                                                                                                                57
Warner Bros.                                                                                                                                                                         42
John Williams                                                                                                                                                                        37
France                                                                                                                                                                               35
Alfred Newman                                                                   

In [124]:
# Formatting the year to first 4 digits, then converting to int
data["Year"] = data["Year"].str[0:4].astype('int64')
data.head()

Unnamed: 0,Year,Category,Nominee,Additional Info,Won?,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2010,Actor -- Leading Role,Javier Bardem,Biutiful {'Uxbal'},NO,,,,,,
1,2010,Actor -- Leading Role,Jeff Bridges,True Grit {'Rooster Cogburn'},NO,,,,,,
2,2010,Actor -- Leading Role,Jesse Eisenberg,The Social Network {'Mark Zuckerberg'},NO,,,,,,
3,2010,Actor -- Leading Role,Colin Firth,The King's Speech {'King George VI'},YES,,,,,,
4,2010,Actor -- Leading Role,James Franco,127 Hours {'Aron Ralston'},NO,,,,,,


In [125]:
# Select only rows where year > 2000 and is of one of the four award categories listed

later_than_2000 = data.loc[data["Year"] > 2000]
award_categories = ["Actor -- Leading Role", "Actor -- Supporting Role", "Actress -- Leading Role", "Actress -- Supporting Role"]
nominations = later_than_2000.loc[later_than_2000["Category"].isin(award_categories)]
nominations.head()

Unnamed: 0,Year,Category,Nominee,Additional Info,Won?,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,2010,Actor -- Leading Role,Javier Bardem,Biutiful {'Uxbal'},NO,,,,,,
1,2010,Actor -- Leading Role,Jeff Bridges,True Grit {'Rooster Cogburn'},NO,,,,,,
2,2010,Actor -- Leading Role,Jesse Eisenberg,The Social Network {'Mark Zuckerberg'},NO,,,,,,
3,2010,Actor -- Leading Role,Colin Firth,The King's Speech {'King George VI'},YES,,,,,,
4,2010,Actor -- Leading Role,James Franco,127 Hours {'Aron Ralston'},NO,,,,,,


In [126]:
# Changing Won? column to 1 and 0, renaming it to "Won" and dropping useless columns

nominations["Won?"] = nominations["Won?"].astype(str)
replacements = { "NO": 0, "YES": 1 }
nominations["Won?"] = nominations["Won?"].map(replacements)
nominations["Won"] = nominations["Won?"]
drop_cols = ["Won?","Unnamed: 5", "Unnamed: 6","Unnamed: 7", "Unnamed: 8", "Unnamed: 9", "Unnamed: 10"]
final_nominations = nominations.drop(drop_cols, axis=1)
final_nominations.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Year,Category,Nominee,Additional Info,Won
0,2010,Actor -- Leading Role,Javier Bardem,Biutiful {'Uxbal'},0
1,2010,Actor -- Leading Role,Jeff Bridges,True Grit {'Rooster Cogburn'},0
2,2010,Actor -- Leading Role,Jesse Eisenberg,The Social Network {'Mark Zuckerberg'},0
3,2010,Actor -- Leading Role,Colin Firth,The King's Speech {'King George VI'},1
4,2010,Actor -- Leading Role,James Franco,127 Hours {'Aron Ralston'},0


In [127]:
# change the additional info column into two columns showing movie and char played
additional_info_one = final_nominations["Additional Info"].str.rstrip(("'}"))
additional_info_two = additional_info_one.str.split(" {'")
movie_names = additional_info_two.str[0]
characters = additional_info_two.str[1]
final_nominations["Movie"] = movie_names
final_nominations["Character"] = characters
final_nominations.head()

Unnamed: 0,Year,Category,Nominee,Additional Info,Won,Movie,Character
0,2010,Actor -- Leading Role,Javier Bardem,Biutiful {'Uxbal'},0,Biutiful,Uxbal
1,2010,Actor -- Leading Role,Jeff Bridges,True Grit {'Rooster Cogburn'},0,True Grit,Rooster Cogburn
2,2010,Actor -- Leading Role,Jesse Eisenberg,The Social Network {'Mark Zuckerberg'},0,The Social Network,Mark Zuckerberg
3,2010,Actor -- Leading Role,Colin Firth,The King's Speech {'King George VI'},1,The King's Speech,King George VI
4,2010,Actor -- Leading Role,James Franco,127 Hours {'Aron Ralston'},0,127 Hours,Aron Ralston


In [128]:
import sqlite3
conn = sqlite3.connect("nominations.db")
final_nominations.to_sql("nominations", conn, index = False)

  chunksize=chunksize, dtype=dtype)


In [133]:
conn = sqlite3.connect("nominations.db")
conn.execute("pragma table_info()")

OperationalError: near ")": syntax error