# Using APIs and Intro to Machine Learning
#### Connor Leggett

## Using an API

In [1]:
# Importing necessary libraries
import urllib
import requests
import time

### Finding Song Lyrics

In [2]:
# Base URL:
base_url = "https://api.lyrics.ovh/v1/"

# User's input
song = input("Song: ")
artist = input("Artist: ")

# Create a new url based on users requests:
data = requests.get(base_url + artist + "/" + song).json()
print(data['lyrics'])

# Making sure the API returned a value

Song: a
Artist: halsey


KeyError: 'lyrics'

## Making an API

In [4]:
# Necessary imports
import random
from flask import Flask, request
import random

### Random Number API

In [6]:
# Initialize flask app
app = Flask(__name__)

# On the main page, return a random number (as a string)
@app.route("/", methods=['GET'])
def first_api():
    x = random.randrange(1,10)
    return str(x)

#Run the app
app.run()


OSError: [Errno 48] Address already in use

### Addition API

In [None]:
# Initialize flask app
app = Flask(__name__)

# On the main page, return a random number (as a string)
@app.route("/first", methods=['GET'])
def first_api():
    x = random.randrange(1,10)
    return str(x)


# On the add page, take two arguments, add them, and return the string
@app.route("/add", methods=['GET'])
def add():
    x = request.args.get("first")
    y = request.args.get("second")
    return str(int(x) + int(y))

#Run the app
app.run()

# http://127.0.0.1:5000/add?first=5&second=6


## Data Science

In [None]:
# Necessary imports:
import json
import pandas as pd
from pandas.plotting import parallel_coordinates
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
import numpy as np
import random
import os

# Setting up current directory:
CURRENT_DIRECTORY = os.getcwd()

### K Nearest Neighbors

In [None]:
# Importing data from a csv file
dataset = pd.read_csv(CURRENT_DIRECTORY + "/iris_data.csv")

# Adding an Id tag to the dataframe
dataset["Id"] = range(150)

# Check how many of each species we have

# splitting up the labels and the values for each species:
feature_columns = ['SepalLength', 'SepalWidth', 'PetalLength','PetalWidth']
X = dataset[feature_columns].values
Y = dataset['Species'].values


# Encoding Labels (Turning string species names into integers)
# setosa -> 0
# versicolor -> 1
# virginica -> 2
le = LabelEncoder()
Y = le.fit_transform(Y)
Y

In [None]:
dataset

### Visualize the data

In [None]:
# Data Visualization:
plt.figure(figsize=(15,10))
parallel_coordinates(dataset.drop("Id", axis=1), "Species")
plt.title('Parallel Coordinates Plot', fontsize=20, fontweight='bold')
plt.xlabel('Features', fontsize=15)
plt.ylabel('Features values', fontsize=15)
plt.legend(loc=1, prop={'size': 15}, frameon=True,shadow=True, facecolor="white", edgecolor="black")
plt.show()

### Train the model

In [None]:
# Splitting into training and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)

# Creating the learning model
knn_classifier = KNeighborsClassifier(n_neighbors=9)

# Fitting the model with the training data
knn_classifier.fit(X_train, Y_train)

### Test the model

In [None]:
# Making predictions with the test data (This line is also where we would potentially classify new data)
Y_pred = knn_classifier.predict(X_test)
print(Y_pred)
print(Y_test)

# Finding Accuracy:
accuracy = accuracy_score(Y_test, Y_pred)*100
print('Accuracy of model: ' + str(round(accuracy, 2)) + ' %.')

### Try out different k values

In [None]:
# creating list of cv scores
cv_scores = []
k_list = [i for i in range(1, 50, 2)]

# perform 10-fold cross validation
for k in range(1,50,2):
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, Y_train, cv=10, scoring='accuracy')
    cv_scores.append(scores.mean())



# Displaying results visually
plt.figure()
plt.figure(figsize=(15,10))
plt.title('The optimal number of neighbors', fontsize=20, fontweight='bold')
plt.xlabel('Number of Neighbors K', fontsize=15)
plt.ylabel('Accuracy', fontsize=15)
plt.plot(k_list, cv_scores)

plt.show()

## Making an API for our model

In [None]:
# Set up Flask App
app = Flask(__name__)

@app.route("/", methods = ['GET'])
def classify():
    # array mapping numbers to flower names
    classes = [ "Iris Setosa", "Iris Versicolor", "Iris Virginica"]
    
    # get values for each component, return error message if not a float
    try:
        values = [[float(request.args.get(component)) for component in ["sHeight", "sWidth", "pHeight", "pWidth"]]]
    except TypeError:
        return "An error occured\nUsage: 127.0.0.1:5000?sHeight=NUM&sWidth=NUM&pHeight=NUM&sHeight=NUM"
    
    # Otherwise, return the prediction.
    prediction = knn_classifier.predict(values)[0]
    return classes[prediction]

# Run the app.
app.run()

# try 127.0.0.1:5000?sHeight=5.2&sWidth=4.6&pHeight=4.2&pWidth=7.8
    