In [1]:
# Import the modules
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pickle
import sqlite3


## 1. Split the Data into Training and Testing Sets

### 1.1 Read the preproccessed Pokemon data from the `Resources` folder into a Pandas DataFrame.

In [2]:

# Making a connection between sqlite3
# database and Python Program
cnx = sqlite3.connect('pokemon.sqlite')
     

# Getting all tables from sqlite_master
sql_query = """SELECT name FROM sqlite_master
WHERE type='table';"""
 
# Creating cursor object using connection object
cursor = cnx.cursor()
     
# executing our sql query
cursor.execute(sql_query)
print("List of tables:")
     
# printing all tables list
print(cursor.fetchall())


List of tables:
[('bigfoot',)]


In [3]:
#Turn the table into a pandas dataframe
df = pd.read_sql_query("SELECT * FROM bigfoot", cnx)

# Review the DataFrame
df

Unnamed: 0,Number,County,State,Latitude,Longitude,Classification,Date,Season,Temperature,Humidity,Cloud_cover,Precip_intensity,Visibility,Pressure,Wind_speed,Observed
0,55269,Sullivan County,New Hampshire,43.41549,-72.33093,Class A,2016-06-07,Summer,64,0.79,0.61,0.0010,9.70,998.87,0.49,I was on my way to Claremont from Lebanon on R...
1,49883,Warren County,New Jersey,40.89452,-74.79077,Class B,2015-10-02,Summer,46,0.87,0.93,0.0092,9.16,1022.92,2.87,It was August of 1977 and I had missed my ride...
2,26830,Washington County,Ohio,39.42635,-81.37085,Class B,2009-10-31,Fall,51,0.77,0.81,0.0158,1.97,1011.48,3.94,After getting online to your site we decided t...
3,6643,Washington County,Rhode Island,41.43200,-71.65183,Class A,1978-07-15,Summer,65,0.88,0.80,0.0285,5.71,1014.70,5.47,I've told very few people this story for fear ...
4,50297,Gallatin County,Montana,45.79220,-111.36370,Class B,2015-11-26,Summer,12,0.65,0.08,0.0002,10.00,1037.98,0.40,I used to own a horse boarding facility which ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
710,8715,Grant County,Kentucky,38.52800,-84.58800,Class A,2005-02-02,Fall,33,0.81,0.39,0.0019,6.27,1026.43,2.60,I saw something along Interstate 75 South in K...
711,1850,Carter County,Kentucky,38.33885,-82.89059,Class B,1983-01-15,Winter,27,0.72,1.00,0.0059,4.56,1015.14,10.35,"This was about 1980-1983, I lived in Carter Co..."
712,2363,Ballard County,Kentucky,36.93720,-88.99490,Class A,1993-01-20,Winter,49,0.84,1.00,0.0197,5.13,1023.15,12.09,"My sister and I, while traveling home through ..."
713,1265,Greenup County,Kentucky,38.53187,-82.79890,Class A,2001-01-04,Winter,29,0.81,0.94,0.0002,8.64,1020.63,5.94,well it was really dark outside and i could no...


In [14]:
#Close the connection to the sqlite db
cnx.close()

In [None]:
#Create dummies

In [None]:
#concat dummies to df

In [None]:
#drop the columns from which the dummies were made

### 1.2 Create Target and Features
- Create the target (`y`)  from the “Did_the_first_pokemon_win?” column
- create the features (`X`) DataFrame from the remaining columns.

In [None]:
# Separate the y variable, the target
y = df['Did_the_first_pokemon_win?']

# Separate the X variable, the features
X = df.copy()
X.drop('#_First','#_Second', 'First_Name', 'Second_Name','Battle_number','Did_the_first_pokemon_win?', axis=1, inplace=True)

In [None]:
# Review the X variable DataFrame
X.head()

In [None]:
#Create dummies

In [None]:
# Review the X variable DataFrame
X.head()

### 1.3 Check the balance of the target variable (`y`) by using the `value_counts` function.

In [None]:
# Check the balance of our target values
y.value_counts()

### 1.4 Split the data into training and testing datasets by using `train_test_split`.


In [None]:
# Split the data using train_test_split
# Assign a random_state of 1 to the function
#adding 'stratify' to ensure that the random sample matches the demos of the total sample

#CHRISTIN - CHECK THAT STRATIFY IS NEEDED HERE WITH VALUE COUNTS

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify = y)

### 1.5 Scale the data

Use the `StandardScaler` to scale the features data, remember that only `X_train` and `X_testing` DataFrames should be scaled.

In [None]:
# Create the StandardScaler instance
scaler = StandardScaler()

# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#Save the scaler to an h5 file for Flask to pick up

## 2. Create a Logistic Regression Model with the Original Data

###  2.1 Fit a logistic regression model by using the training data (`X_train` and `y_train`).

In [None]:
# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
classifier = LogisticRegression(solver='lbfgs', random_state=1)

# Fit the model using training data
classifier.fit(X_train, y_train)

In [None]:
#Validate the model w/ training data
#Use method 'score' to return the mean accuracy on the given test data and targets.

classifier.score(X_train, y_train)

In [None]:
#Validate the model w/ testing data
#Use method 'score' to return the mean accuracy on the given test data and targets.
classifier.score(X_test, y_test)

### 2.2 Save the predictions on the testing data labels by using the testing feature data (`X_test`) and the fitted model.

In [None]:
#code

### 2.3 valuate the model’s performance by doing the following:

* Calculate the accuracy score of the model.

* Generate a confusion matrix.

* Print the classification report.

In [None]:
#code

## 3. Save and Reload the Model using Pickle

In [None]:
#Reference
#https://www.geeksforgeeks.org/saving-a-machine-learning-model/

#save the model with pickle
#pickle.dump serializes an object hierarchy

#CHRISTIN define model above
saved_model = pickle.dumps(model, open('model.pkl', 'wb'))

In [None]:
#load the pickle model
pickled_model = pickle.loads(open('model.pkl', 'rb'))

#use the loaded model to make predictions
pickled_model.predict(X_test)