## Build a Web App using a Regression model to learn about UFO sighting

In [21]:
import pandas as pd
import numpy as np

url = 'https://raw.githubusercontent.com/sofzcc/lab-model-deployment/main/web-app/data/ufos.csv'
ufos = pd.read_csv(url)
ufos.head()


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [22]:

ufos = pd.DataFrame({'Duration': ufos['duration (seconds)'], 'City': ufos['city'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude'], 'Shape': ufos['shape']})

# Drop rows with missing values
ufos.Shape.unique()
ufos.head()

# 0 au, 1 ca, 2 de, 3 gb, 4 us

Unnamed: 0,Duration,City,Latitude,Longitude,Shape
0,2700.0,san marcos,29.883056,-97.941111,cylinder
1,7200.0,lackland afb,29.38421,-98.581082,light
2,20.0,chester (uk/england),53.2,-2.916667,circle
3,20.0,edna,28.978333,-96.645833,circle
4,900.0,kaneohe,21.418056,-157.803611,light


In [23]:
print(ufos['Shape'].unique())


['cylinder' 'light' 'circle' 'sphere' 'disk' 'fireball' 'unknown' 'oval'
 'other' 'cigar' 'rectangle' 'chevron' 'triangle' 'formation' nan 'delta'
 'changing' 'egg' 'diamond' 'flash' 'teardrop' 'cone' 'cross' 'pyramid'
 'round' 'crescent' 'flare' 'hexagon' 'dome' 'changed']


In [24]:
ufos.dropna(inplace=True)

ufos = ufos[(ufos['Duration'] >= 1) & (ufos['Duration'] <= 60)]

ufos.info()



<class 'pandas.core.frame.DataFrame'>
Index: 28563 entries, 2 to 80330
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Duration   28563 non-null  float64
 1   City       28563 non-null  object 
 2   Latitude   28563 non-null  float64
 3   Longitude  28563 non-null  float64
 4   Shape      28563 non-null  object 
dtypes: float64(3), object(2)
memory usage: 1.3+ MB


In [29]:
from sklearn.preprocessing import LabelEncoder

ufos['Shape'] = LabelEncoder().fit_transform(ufos['Shape'])

ufos.head()



Unnamed: 0,Duration,City,Latitude,Longitude,Shape
2,20.0,1641,53.2,-2.916667,3
3,20.0,2579,28.978333,-96.645833,3
14,30.0,4924,35.823889,-80.253611,19
18,20.0,751,32.364167,-64.678611,17
23,60.0,9503,45.582778,-122.352222,19


In [31]:
from sklearn.model_selection import train_test_split
Selected_features = ['Duration','Latitude','Longitude']

X = ufos[Selected_features]
y = ufos['Shape']

#Encode the City column
label_encoder = LabelEncoder()
ufos['Shape'] = label_encoder.fit_transform(ufos['Shape'])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)



In [41]:
# Complete the model training here
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression


model = LogisticRegression(penalty='l2', C=1.0, max_iter=1000)  #max_iter = 1000, since both 100 and 500 iterations returned "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT. Logistic regression"


model.fit(X_train, y_train)

predictions = model.predict(X_test)
print(classification_report(y_test, predictions))
print('Predicted labels: ', predictions)
print('Accuracy: ', accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        79
           1       0.00      0.00      0.00        99
           2       0.00      0.00      0.00       156
           3       0.00      0.00      0.00       520
           4       0.00      0.00      0.00        18
           5       0.00      0.00      0.00         1
           6       0.00      0.00      0.00        18
           7       0.00      0.00      0.00        80
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00        70
          10       0.00      0.00      0.00       306
          12       0.00      0.00      0.00        51
          13       0.00      0.00      0.00       567
          15       0.00      0.00      0.00       147
          16       0.00      0.00      0.00       161
          17       0.21      1.00      0.35      1212
          18       0.00      0.00      0.00       416
          19       0.00    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [42]:
import pickle


model_filename = 'ufo-model.pkl'
pickle.dump(model, open(model_filename,'wb'))

model = pickle.load(open('ufo-model.pkl','rb'))
print(model.predict([[50,60,-12]]))




[17]




In [43]:
pwd

'/content'