## Import Required Libraries

In [148]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

sns.set()

## Load Data

In [149]:
df = pd.read_csv("jeopardy.csv")
pd.set_option('display.max_colwidth', None)
df

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,"No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves",Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,"The city of Yuma in this state has a record average of 4,055 hours of sunshine each year",Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", this company served its billionth burger",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",John Adams
...,...,...,...,...,...,...,...
216925,4999,2006-05-11,Double Jeopardy!,RIDDLE ME THIS,$2000,This Puccini opera turns on the solution to 3 riddles posed by the heroine,Turandot
216926,4999,2006-05-11,Double Jeopardy!,"""T"" BIRDS",$2000,"In North America this term is properly applied to only 4 species that are crested, including the tufted",a titmouse
216927,4999,2006-05-11,Double Jeopardy!,AUTHORS IN THEIR YOUTH,$2000,"In Penny Lane, where this ""Hellraiser"" grew up, the barber shaves another customer--then flays him alive!",Clive Barker
216928,4999,2006-05-11,Double Jeopardy!,QUOTATIONS,$2000,"From Ft. Sill, Okla. he made the plea, Arizona is my land, my home, my father's land, to which I now ask to... return""",Geronimo


## Investigate the contents of the data

In [150]:
df.describe(include = "all")

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
count,216930.0,216930,216930,216930,216930,216930,216928
unique,,3640,4,27995,150,216124,88268
top,,1997-05-19,Jeopardy!,BEFORE & AFTER,$400,[audio clue],China
freq,,62,107384,547,42244,17,216
mean,4264.238519,,,,,,
std,1386.296335,,,,,,
min,1.0,,,,,,
25%,3349.0,,,,,,
50%,4490.0,,,,,,
75%,5393.0,,,,,,


In [151]:
df.isnull().sum()

Show Number    0
 Air Date      0
 Round         0
 Category      0
 Value         0
 Question      0
 Answer        2
dtype: int64

In [152]:
df.columns.tolist()

['Show Number',
 ' Air Date',
 ' Round',
 ' Category',
 ' Value',
 ' Question',
 ' Answer']

#### Remove front space in column names

In [153]:
df.rename(columns = {"Show Number": "show_number", " Air Date": "air_date", " Round": "round", " Category": "category", " Value": "value", " Question": "question", " Answer": "answer"}, inplace = True)
df.columns.tolist()

['show_number', 'air_date', 'round', 'category', 'value', 'question', 'answer']

## Create a function to find question about some list of words

In [154]:
def find_questions_about(word_list):
    import pandas as pd
    main_df = pd.read_csv("jeopardy.csv")
    new_df = pd.DataFrame(columns = main_df.columns.tolist())
    #return main_df[main_df[" Question"].str.contains(word_list[0], case = False, regex = False)]
    for word in word_list:
        new_df = new_df.append(main_df[main_df[" Question"].str.contains(word, case = False, regex = False)])
    new_df.rename(columns = {"Show Number": "show_number", " Air Date": "air_date", " Round": "round", " Category": "category", " Value": "value", " Question": "question", " Answer": "answer"}, inplace = True)
    new_df = new_df.reset_index()
    return new_df

In [155]:
test_df = find_questions_about(["King", "England"])
test_df

Unnamed: 0,index,show_number,air_date,round,category,value,question,answer
0,34,4680,2004-12-31,Double Jeopardy!,"""X""s & ""O""s",$400,Around 100 A.D. Tacitus wrote a book on how this art of persuasive speaking had declined since Cicero,oratory
1,40,4680,2004-12-31,Double Jeopardy!,DR. SEUSS AT THE MULTIPLEX,$1200,"<a href=""http://www.j-archive.com/media/2004-12-31_DJ_26.mp3"">Ripped from today's headlines, he was a turtle king gone mad; Mack was the one good turtle who'd bring him down</a>",Yertle
2,50,4680,2004-12-31,Double Jeopardy!,DR. SEUSS AT THE MULTIPLEX,$2000,"<a href=""http://www.j-archive.com/media/2004-12-31_DJ_24.mp3"">""500 Hats""... 500 ways to die. On July 4th, this young boy will defy a king... & become a legend</a>",Bartholomew Cubbins
3,56,5957,2010-07-06,Jeopardy!,"GEOGRAPHY ""E""",$200,It's the largest kingdom in the United Kingdom,England
4,72,5957,2010-07-06,Jeopardy!,LET'S BOUNCE,$600,"In this kid's game, you bounce a small rubber ball while picking up 6-pronged metal objects",jacks
...,...,...,...,...,...,...,...,...
8523,215892,6003,2010-10-20,Double Jeopardy!,STATE / COLLEGES,$800,New England College & Daniel Webster College,New Hampshire
8524,216021,1881,1992-11-09,Double Jeopardy!,HISTORIC NAMES,$1000,"His nickname was ""Bertie"", but he used this name & number when he became king of England in 1901",Edward VII
8525,216035,4364,2003-07-17,Jeopardy!,CONTEMPORARIES,$400,In 1699 Capt. Kidd was out pirating for loot & this scientist was made master of the Mint in England,Sir Isaac Newton
8526,216550,3038,1997-11-12,Double Jeopardy!,COLONISTS,$600,"You have to ""owe it"" to Oglethorpe; he wanted GA. to be a haven for men imprisoned for this crime in England",Having a debt


## Average value price

In [156]:
df['value'] = df['value'].apply(lambda x: x.replace("$", ""))
df['value'] = df['value'].apply(lambda x: x.replace(",", ""))
df['value'] = df['value'].apply(lambda x: x.replace("None", ""))
df['value'] = pd.to_numeric(df['value'],errors = 'coerce')
df['value']

0          200.0
1          200.0
2          200.0
3          200.0
4          200.0
           ...  
216925    2000.0
216926    2000.0
216927    2000.0
216928    2000.0
216929       NaN
Name: value, Length: 216930, dtype: float64

In [163]:
df.fillna(method = "ffill", inplace = True, axis = 0)
df['value']

0          200.0
1          200.0
2          200.0
3          200.0
4          200.0
           ...  
216925    2000.0
216926    2000.0
216927    2000.0
216928    2000.0
216929    2000.0
Name: value, Length: 216930, dtype: float64

In [164]:
avg_value = df['value'].mean()
avg_value

767.360899829438

## Create a function to count unique number of answers to a question

In [228]:
def count_unique_answers():
    import pandas as pd
    main_df = pd.read_csv("jeopardy.csv")
    new_df = pd.DataFrame()
    new_df["question"] = main_df[' Question'].unique()
    new_df["number_of_unique_answers"] = main_df.groupby(by = [' Question'])[' Answer'].count().tolist()
    
    return new_df

In [229]:
unique_answer_count = count_unique_answers()
unique_answer_count

Unnamed: 0,question,number_of_unique_answers
0,"For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",1
1,"No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves",1
2,"The city of Yuma in this state has a record average of 4,055 hours of sunshine each year",1
3,"In 1963, live on ""The Art Linkletter Show"", this company served its billionth burger",1
4,"Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",1
...,...,...
216119,This Puccini opera turns on the solution to 3 riddles posed by the heroine,1
216120,"In North America this term is properly applied to only 4 species that are crested, including the tufted",1
216121,"In Penny Lane, where this ""Hellraiser"" grew up, the barber shaves another customer--then flays him alive!",1
216122,"From Ft. Sill, Okla. he made the plea, Arizona is my land, my home, my father's land, to which I now ask to... return""",1
