In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

#### Make dataframe

In [3]:
jeopardy = pd.read_csv('jeopardy.csv')

#### Print specific column

In [4]:
print(jeopardy['Show Number'])

0         4680
1         4680
2         4680
3         4680
4         4680
          ... 
216925    4999
216926    4999
216927    4999
216928    4999
216929    4999
Name: Show Number, Length: 216930, dtype: int64


#### Rename columns to remove blank spaces

In [5]:
jeopardy.rename(columns={'Show Number': 'Show_number', ' Air Date': 'Air_date', ' Round': 'Round', ' Category': 'Category', ' Value': 'Value', ' Question': 'Question', ' Answer': 'Answer'}, inplace= True)

In [22]:
print(jeopardy.head())

   Show_number    Air_date      Round                         Category Value  \
0         4680  2004-12-31  Jeopardy!                          HISTORY  $200   
1         4680  2004-12-31  Jeopardy!  ESPN's TOP 10 ALL-TIME ATHLETES  $200   
2         4680  2004-12-31  Jeopardy!      EVERYBODY TALKS ABOUT IT...  $200   
3         4680  2004-12-31  Jeopardy!                 THE COMPANY LINE  $200   
4         4680  2004-12-31  Jeopardy!              EPITAPHS & TRIBUTES  $200   

                                                                                                      Question  \
0             For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory   
1  No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves   
2                     The city of Yuma in this state has a record average of 4,055 hours of sunshine each year   
3                         In 1963, live on "The Art Linkletter 

In [7]:
print(jeopardy.keys())

Index(['Show_number', 'Air_date', 'Round', 'Category', 'Value', 'Question',
       'Answer'],
      dtype='object')


#### Filter dataset by list of words

In [8]:

def filter_data(data, words):
  # Lowercases all words in the list of words as well as the questions. Returns true is all of the words in the list appear in the question.
  filter = lambda x: all(word.lower() in x.lower() for word in words)
  # Applies the labmda function to the Question column and returns the rows where the function returned True
  return data.loc[data["Question"].apply(filter)]


In [9]:
# Testing the filter function
filtered = filter_data(jeopardy, ["King", "England"])
print(filtered["Question"])


4953                    Both England's King George V & FDR put their stamp of approval on this "King of Hobbies"
6337      In retaliation for Viking raids, this "Unready" king of England attacks Norse areas of the Isle of Man
9191                    This king of England beat the odds to trounce the French in the 1415 Battle of Agincourt
11710               This Scotsman, the first Stuart king of England, was called "The Wisest Fool in Christendom"
13454                                       It's the number that followed the last king of England named William
                                                           ...                                                  
208295        In 1066 this great-great grandson of Rollo made what some call the last Viking invasion of England
208742                      Dutch-born king who ruled England jointly with Mary II & is a tasty New Zealand fish
213870                In 1781 William Herschel discovered Uranus & initially named it after this

#### Calculate mean of prize money for question

In [10]:
print((jeopardy['Value'][0]).split('$')[-1])

200


In [11]:
jeopardy['Float_Values'] = jeopardy['Value'].apply(lambda x: float(x[1:] .replace(',','')) if x != 'None' else None)

    


In [13]:
mean = jeopardy['Float_Values'].mean( skipna = True).round(2)

print(mean)

filtered_2 = filter_data(jeopardy, ["King"])

mean_of_filtered_2 = filtered_2['Float_Values'].mean(skipna =True).round(1)

print(mean_of_filtered_2)

752.6
785.7


#### Count of unique answers

In [14]:
def get_answer_counts(data):
    return data["Answer"].value_counts()

In [24]:
# Testing the answer count function
print(get_answer_counts(filtered))

William the Conqueror       6
Wessex                      3
Richard the Lionhearted     3
Henry VIII                  3
George III                  3
                           ..
The Magna Carta             1
King Hussein                1
Charles                     1
(Sir Edward) Elgar          1
William of Orange roughy    1
Name: Answer, Length: 114, dtype: int64


#### Compare how many times a word is listed in a question across different decades

In [16]:
def compare_90s_00s(word):
    the_90s = jeopardy[jeopardy['Air_date'].str.startswith('199') & jeopardy.Question.str.contains(word)]
    the_00s = jeopardy[jeopardy['Air_date'].str.startswith('200') & jeopardy.Question.str.contains(word)]
    
    count_90s = the_90s.Question.count()
    count_00s = the_00s.Question.count()
    
    return ('count_90s: ', count_90s, 'count_00s: ', count_00s)


    
    

In [17]:
print(compare_90s_00s('computer'))

('count_90s: ', 90, 'count_00s: ', 244)


#### Compare categories in rounds

In [25]:
print(jeopardy['Round'].unique())

['Jeopardy!' 'Double Jeopardy!' 'Final Jeopardy!' 'Tiebreaker']


In [19]:
print(jeopardy['Category'][1].lower())

espn's top 10 all-time athletes


In [20]:
def compare_cat(word):
    jeopardy_count = 0
    double_jeopardy_count = 0
    final_jeopardy_count = 0
    tiebreaker_count = 0
    index = 0
    for x in jeopardy['Round']:
        if x == 'Jeopardy!':
            if jeopardy['Category'][index].lower() == word.lower():
                jeopardy_count += 1
        elif x == 'Double Jeopardy!':
            if jeopardy['Category'][index].lower() == word.lower():
                double_jeopardy_count += 1
        elif x == 'Final Jeopardy!':
            if jeopardy['Category'][index].lower() == word.lower():
                final_jeopardy_count += 1
        elif x == 'Tiebreaker!':
            if jeopardy['Category'][index].lower() == word.lower():
                tiebreaker_count += 1    
        index +=1
    return ( jeopardy_count , double_jeopardy_count ,  final_jeopardy_count , tiebreaker_count)
                
        

In [21]:
print(compare_cat('LITERATURE'))

(105, 381, 10, 0)
