In [2]:
import pandas as pd
import datetime

In [3]:
#set to see more columns
pd.set_option('display.max_colwidth', 10)

In [4]:
#read csv file
jeopardy = pd.read_csv('jeopardy.csv')

In [5]:
#jeopardy DataFrame before pandas manipulating
print(jeopardy.head())

   Show Number   Air Date      Round   Category  Value   Question     Answer
0       4680    2004-1...  Jeopardy!    HISTORY   $200  For th...  Copern...
1       4680    2004-1...  Jeopardy!  ESPN's...   $200  No. 2:...  Jim Th...
2       4680    2004-1...  Jeopardy!  EVERYB...   $200  The ci...    Arizona
3       4680    2004-1...  Jeopardy!  THE CO...   $200  In 196...  McDona...
4       4680    2004-1...  Jeopardy!  EPITAP...   $200  Signer...  John A...


In [6]:
#rename columns
jeopardy.rename(columns={
  ' Air Date': 'Air Date',
  ' Round': 'Round',
  ' Category': 'Category',
  ' Value': 'Value',
  ' Question': 'Question',
  ' Answer': 'Answer'
},inplace=True)

In [7]:
#function that filters the dataset for questions that contains all of the words in a list of words
def data_filter(data, words):
  filter = lambda x: all(word.lower() in x.lower() for word in words)
  return data.loc[data['Question'].apply(filter)]

In [8]:
#adding new column with a float value
jeopardy['Float Value'] = jeopardy['Value'].apply(lambda x: float(x.strip('$').replace(',','')) if x!= "None" else 0)

In [9]:
#filtering for words: "King" and "England"
filtered = data_filter(jeopardy, ["King", "England"])
print(filtered)

        Show Number   Air Date      Round   Category   Value   Question  \
4953         3003    1997-0...  Double...  "PH"UN...    $200  Both E...   
6337         3517    1999-1...  Double...        Y1K    $800  In ret...   
9191         3907    2001-0...  Double...  WON TH...    $800  This k...   
11710        2903    1997-0...  Double...  BRITIS...    $600  This S...   
13454        4726    2005-0...  Jeopardy!  A NUMB...   $1000  It's t...   
...           ...          ...        ...        ...     ...        ...   
208295       4621    2004-1...  Jeopardy!  THE VI...    $600  In 106...   
208742       4863    2005-1...  Double...  BEFORE...  $3,000  Dutch-...   
213870       5856    2010-0...  Double...     URANUS   $1600  In 178...   
216021       1881    1992-1...  Double...  HISTOR...   $1000  His ni...   
216789       5070    2006-0...  Double...  ANCIEN...   $1200  This k...   

           Answer  Float Value  
4953    Philat...      200.0    
6337     Ethelred      800.0    


In [10]:
#filtering for an average difficulty
print(filtered.groupby('Question')['Float Value'].mean())

Question
"Carolus Stuardus" by Andreas Gryphius is about this executed king of England                                                                                                                                                                                                                                                                                                              1200.0
"More butcher than king in England" is seen as this leader born in 1599, 33 years after Nostradamus died                                                                                                                                                                                                                                                                                   2000.0
(<a href="http://www.j-archive.com/media/2000-03-02_J_22.jpg" target="_blank">Alex Trebek reads from England.</a>)  In 1214 barons fed up with King John met at the abbey whose ruins we see here; the result a year later 

In [11]:
#function that returns the count of the unique answers to all of the questions in a dataset
def unique_answers(data):
  return data.groupby('Answer').nunique()

In [12]:
print(unique_answers(jeopardy))

            Show Number  Air Date  Round  Category  Value  Question  \
Answer                                                                
 Hamlet             1           1      1         1      1         1   
 Les Mi...          1           1      1         1      1         1   
 Nosferatu          1           1      1         1      1         1   
 She Lo...          1           1      1         1      1         1   
 Sleepl...          1           1      1         1      1         1   
...               ...         ...    ...       ...    ...       ...   
étoufée             2           2      2         2      2         2   
études              1           1      1         1      1         1   
été                 1           1      1         1      1         1   
über                1           1      1         1      1         1   
“one gi...          1           1      1         1      1         1   

            Float Value  
Answer                   
 Hamlet             1   

In [13]:
#adding new column year and decades
jeopardy['Year'] = pd.DatetimeIndex(jeopardy['Air Date']).year
jeopardy['Decades'] = jeopardy['Year'].apply(lambda x: '2000s' if x>=2000 else '90s')

In [14]:
#filtering for a word in question by decades
by_decades_filtered = data_filter(jeopardy,['Computer'])

In [15]:
#investinge how many questions from the 90s use the word "Computer" compared to questions from the 2000s
print(by_decades_filtered.groupby('Decades').Category.count())

Decades
2000s    327
90s      104
Name: Category, dtype: int64
