# Clean

In [1]:
import pandas as pd
# Import our modules
import numpy as np
import statsmodels as sm
import pylab as pl
from scipy import stats
# from sklearn.externals.six import StringIO
from sklearn import preprocessing
from sklearn import cluster, tree, decomposition
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
# import pydot
import seaborn as sns
import plotly.express as px

In [2]:
# create a file name
file_name = "jeopardy_questions.csv"

In [3]:
# name file
j_questions = pd.read_csv(file_name)

In [4]:
# issue with columns... there is a space before the word
j_questions.columns

Index(['Show Number', ' Air Date', ' Round', ' Category', ' Value',
       ' Question', ' Answer'],
      dtype='object')

In [5]:
# remove the $
j_questions[' Value'] = j_questions[' Value'].str[1:]
j_questions.head()

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,200,"For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,200,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,200,The city of Yuma in this state has a record av...,Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,200,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,200,"Signer of the Dec. of Indep., framer of the Co...",John Adams


In [6]:
# remove 'one' from data
j_questions = j_questions[j_questions[' Value'] != 'one']

In [7]:
# df_value[' Value'].replace(",", "")
j_questions[' Value'] = j_questions[' Value'].replace(',','', regex=True)

In [8]:
#  change values to a float
j_questions[' Value'] = j_questions[' Value'].astype(float)

In [14]:
# double_j is double jeopardy
double_j = j_questions[j_questions[' Round'] == 'Double Jeopardy!']
double_j

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
29,4680,2004-12-31,Double Jeopardy!,DR. SEUSS AT THE MULTIPLEX,400.0,"<a href=""http://www.j-archive.com/media/2004-1...",Horton
30,4680,2004-12-31,Double Jeopardy!,PRESIDENTIAL STATES OF BIRTH,400.0,California,Nixon
31,4680,2004-12-31,Double Jeopardy!,AIRLINE TRAVEL,400.0,It can be a place to leave your puppy when you...,a kennel
32,4680,2004-12-31,Double Jeopardy!,THAT OLD-TIME RELIGION,400.0,"He's considered the author of the Pentateuch, ...",Moses
33,4680,2004-12-31,Double Jeopardy!,MUSICAL TRAINS,400.0,Steven Tyler of this band lent his steamin' vo...,Aerosmith
...,...,...,...,...,...,...,...
216924,4999,2006-05-11,Double Jeopardy!,OFF-BROADWAY,2000.0,In 2006 the cast of this long-running hit emba...,Stomp
216925,4999,2006-05-11,Double Jeopardy!,RIDDLE ME THIS,2000.0,This Puccini opera turns on the solution to 3 ...,Turandot
216926,4999,2006-05-11,Double Jeopardy!,"""T"" BIRDS",2000.0,In North America this term is properly applied...,a titmouse
216927,4999,2006-05-11,Double Jeopardy!,AUTHORS IN THEIR YOUTH,2000.0,"In Penny Lane, where this ""Hellraiser"" grew up...",Clive Barker


In [15]:
# n_j is normal questions
n_j = j_questions[j_questions[' Round'] == 'Jeopardy!']
n_j

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,200.0,"For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,200.0,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,200.0,The city of Yuma in this state has a record av...,Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,200.0,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,200.0,"Signer of the Dec. of Indep., framer of the Co...",John Adams
...,...,...,...,...,...,...,...
216894,4999,2006-05-11,Jeopardy!,LOVE SONGS IN GERMAN,1000.0,"Joe Cocker: ""Sie Sind So Schon""","""You Are So Beautiful"""
216895,4999,2006-05-11,Jeopardy!,FIRST IN OUR HEARTS,1000.0,"Having no reeds, pipes or vibrating parts, thi...",(Laurens) Hammond
216896,4999,2006-05-11,Jeopardy!,IT'S NOT ALEX TREBEK,1000.0,"It's not me seen <a href=""http://www.j-archive...",Neville Chamberlain
216897,4999,2006-05-11,Jeopardy!,SCIENCE BRIEFS,1000.0,"The unit of magnetic flux density, abbreviated...",(Nikola) Tesla


# Descriptive Analysis
## Most common Categories?
## Average bet in double Jeopardy?

In [19]:
# most common value in double jeopardy
double_j[' Value'].value_counts()

400.0      21288
800.0      19856
2000.0     12585
1200.0     11602
1600.0     10952
           ...  
10400.0        1
1183.0         1
20.0           1
9500.0         1
22.0           1
Name:  Value, Length: 141, dtype: int64

In [16]:
# stats on double jeopardy values
double_j.describe()

Unnamed: 0,Show Number,Value
count,105912.0,105912.0
mean,4264.161644,1017.291742
std,1387.287091,755.440606
min,1.0,5.0
25%,3348.0,400.0
50%,4492.0,800.0
75%,5393.25,1500.0
max,6298.0,18000.0


In [27]:
# most common category for normal jeopardy
n_j[' Category'].value_counts()

POTPOURRI            255
STUPID ANSWERS       255
SPORTS               253
ANIMALS              233
AMERICAN HISTORY     227
                    ... 
ATLANTIS               1
ONE MAN, ONE BOAT      1
UP YOUR "ALLEY"        1
POE                    1
HAND SIGNALS           1
Name:  Category, Length: 15155, dtype: int64

In [44]:
# most common categories for double jeopardy
double_j[' Category'].value_counts()

BEFORE & AFTER                  450
LITERATURE                      381
SCIENCE                         296
WORLD GEOGRAPHY                 254
OPERA                           250
                               ... 
ATHLETES' COUNTRIES OF BIRTH      1
CAPITAL CITY OF BIRTH             1
NEWFOUNDLAND                      1
AROUND THE LAB WITH LEX           1
MIDWAY ISLAND                     1
Name:  Category, Length: 14576, dtype: int64

# Prescriptive Analysis

## Focus studying subject
* Potpouri 
* Literature
* Science
* World Geography
* Sports
* Animal
* American History

## Thoughts on double jeopardy bets

### The guest should consider the context.
### However, The average bet is around a thousand dollars.
### Guest should use that at as a way to gauge their peformance against other guests.
### One contestent used double jeopardy as a way to create cash distance from the others.
### His average was around 9000 dollars.