In [21]:
# Import the basic libraries
import pandas as pd
import numpy as np

# Load the UFO sightings dataset
ufos = pd.read_csv('./data/ufos.csv')

# Display the first rows
ufos.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [22]:
# Create a smaller dataframe with only the needed columns
ufos = pd.DataFrame({
    'Seconds': ufos['duration (seconds)'],
    'Country': ufos['country'],
    'Latitude': ufos['latitude'],
    'Longitude': ufos['longitude']
})

# Check the unique countries
ufos.Country.unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [23]:
# Drop rows with missing values
ufos.dropna(inplace=True)

# Keep only sightings between 1 and 60 seconds
ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]

# Show info about the dataset
ufos.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25863 entries, 2 to 80330
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Seconds    25863 non-null  float64
 1   Country    25863 non-null  object 
 2   Latitude   25863 non-null  float64
 3   Longitude  25863 non-null  float64
dtypes: float64(3), object(1)
memory usage: 1010.3+ KB


In [24]:
# Now check the unique country codes before encoding
print(sorted(ufos['Country'].unique()))

['au', 'ca', 'de', 'gb', 'us']


In [25]:
# Import LabelEncoder
from sklearn.preprocessing import LabelEncoder

# Create and fit the encoder
label_encoder = LabelEncoder()
ufos['Country'] = label_encoder.fit_transform(ufos['Country'])

# Display the first rows
ufos.head()


Unnamed: 0,Seconds,Country,Latitude,Longitude
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
14,30.0,4,35.823889,-80.253611
23,60.0,4,45.582778,-122.352222
24,3.0,3,51.783333,-0.783333


In [28]:
# Select features for training
Selected_features = ['Seconds', 'Latitude', 'Longitude']

X = ufos[Selected_features]
y = ufos['Country']

In [29]:
# Import train_test_split
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [34]:
# Import the logistic regression model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, predictions))
print('Predicted labels: ', predictions)
print('Accuracy: ', accuracy_score(y_test, predictions))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41
           1       0.85      0.47      0.60       250
           2       1.00      1.00      1.00         8
           3       1.00      1.00      1.00       131
           4       0.97      1.00      0.98      4743

    accuracy                           0.97      5173
   macro avg       0.96      0.89      0.92      5173
weighted avg       0.97      0.97      0.97      5173

Predicted labels:  [4 4 4 ... 3 4 4]
Accuracy:  0.9702300405953992


In [35]:
# Import pickle to save the model
import pickle

# Define the filename for the model
model_filename = 'ufo-model.pkl'

# Save the model
pickle.dump(model, open(model_filename, 'wb'))

In [36]:
# Load the model from the file
model = pickle.load(open('ufo-model.pkl', 'rb'))

# Make a test prediction
print(model.predict([[50, 44, -12]]))  # Example: 50 seconds, latitude 44, longitude -12

[3]




In [None]:
# Pros and Cons of having a `/train` route in a Flask app

## ✅ Pros
- Easy to retrain the model by just visiting a URL.
- Useful for development and testing.
- Automatically updates the model without using `.pkl`.
- Keeps the model up-to-date with new data.

## ⚠️ Cons
- Not secure: anyone can access `/train` and retrain the model.
- Can slow down or crash the app if used too often.
- Overwrites the model every time (no version control).
- Not scalable for production environments.

## 💡 Conclusion
Great for learning and local testing, but not safe or practical for real-world production apps.
