# Logistic Regression for Predicting Feeling Earthquakes

### Earthquakes are being predicted for how likely they will be felt, and around what magnitude

- US earthquake data is from http://earthquake.usgs.gov and include 77,161 data points for earthquakes from 1990 to 2018.
- US zipcodes data is from https://www.census.gov/geo/maps-data/data/gazetteer2018.html

In [1]:
# Required Python Packages
import pandas as pd
import csv
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Creates a new earthquake file based on the magnitude, it assigns "felt" values, of 0-4.
- NOT-LIKELY feel an earthquake (mag: 0 - 2.4)
- MAYBE feel an earthquake (mag: 2.5 - 3.9) 
- LIKELY feel an earthquake (mag: 4 - 4.9) 
- LIKELY feel an earthquake (mag: 5 - 5.9) and MIGHT see some MINOR-SERIOUS damage from it
- LIKELY feel an earthquake (mag: 6 - 8) and LIKELY see some SERIOUS

In [2]:
with open('USearthquakes1990to2018.csv','r') as csvinput:
    with open('USearthquakes_felt.csv', 'w') as csvoutput:
        writer = csv.writer(csvoutput, lineterminator='\n')
        reader = csv.reader(csvinput)
        all = []
        row = next(reader)
        row.append('felt')
        all.append(row)

        for row in reader:
            if float(row[4]) <= 2.5:
                row.append(0)
                all.append(row)
            elif float(row[4]) <= 3.9:
                row.append(1)
                all.append(row)
            elif float(row[4]) <= 4.9:
                row.append(2)
                all.append(row)
            elif float(row[4]) <= 5.9:
                row.append(3)
                all.append(row)
            elif float(row[4]) > 5.9:
                row.append(4)
                all.append(row)          

        writer.writerows(all)

# Prints information about the US earthquakes dataset
- Number of rows (each is a earthquake event recorded)
- The title of each column, or each feature
- The first few rows of the earthquake data

In [3]:
# File to be used for the logistic regression model
EARTHQUAKES_FILE = "USearthquakes_felt.csv"

# Load the data set for training and testing the logistic regression classifier
dataset = pd.read_csv(EARTHQUAKES_FILE)
print ("Number of Observations =", len(dataset),"\n")

# Load the data set for training and testing the logistic regression classifier
dataset = pd.read_csv(EARTHQUAKES_FILE)
 
headers = list(dataset.columns.values)
print("Data set headers = {headers}".format(headers=headers),"\n")

# Gets the first few rows of the US earthquake dataset
print ("First few rows =\n",dataset.head())

Number of Observations = 77161 

Data set headers = ['datetime', 'latitude', 'longitude', 'depth', 'magnitude', 'felt'] 

First few rows =
                    datetime   latitude   longitude  depth  magnitude  felt
0  2018-12-18T04:21:40.550Z  40.934334 -124.629837  14.82       3.03     1
1  2018-12-17T18:02:39.570Z  36.094000 -117.882000   4.09       2.62     1
2  2018-12-17T11:26:24.210Z  38.057667 -118.875168  10.70       2.69     1
3  2018-12-17T07:42:48.530Z  36.462900  -98.773700   7.92       3.00     1
4  2018-12-17T02:52:01.740Z  35.956833 -116.734500  -0.34       3.62     1


# Training the Logistic Regression Model
- Testing for accuracy
- Splitting the dataset
- Prints the sizes of train_X train_Y test_X test_Y

In [4]:
def train_logistic_regression(train_x, train_y):
    """
    Trains the logistic regression model with the train dataset features(train_x) and target(train_y)
    """
    logistic_regression_model = LogisticRegression()
    logistic_regression_model.fit(train_x, train_y)
    return logistic_regression_model

In [5]:
def model_accuracy(trained_model, features, targets):
    """
    Obtains the accuracy of the model
    """
    accuracy_score = trained_model.score(features, targets)
    return accuracy_score

In [6]:
# Load the data set for training and testing the logistic regression classifier
dataset = pd.read_csv(EARTHQUAKES_FILE)
 
training_features = ['latitude', 'longitude']
#need to add calculations on if the earthquakes are felt or not, depending on the zipcode (put it before this line)
target = 'felt'
    
# Train and Test data split
train_x, test_x, train_y, test_y = train_test_split(dataset[training_features], dataset[target], train_size=0.7)
trained_logistic_regression_model = train_logistic_regression(train_x, train_y)
    
print ("train_x size = ", train_x.shape)
print ("train_y size = ", train_y.shape)
 
print ("test_x size = ", test_x.shape)
print ("test_y size = ", test_y.shape)



train_x size =  (54012, 2)
train_y size =  (54012,)
test_x size =  (23149, 2)
test_y size =  (23149,)


# Finds the zipcode in the zipcode file and gets the lat and lon associated with the zipcode

In [7]:
# zipcode file that has lat and lon values associated with the zipcode
with open('USzipcodes.csv', 'r') as csvfile:
    lines = csv.reader(csvfile)
    
#checks if zipcode inputted is the approriate length
def lengthCheck(x):
    if len(str(x)) > 5 or len(str(x)) < 3: a = 0
    else: a = 1
    return a

#finds lat and lon of the zipcode in the file
def findMatch(inpt):
    if lengthCheck(inpt) == 0:
        print("Invalid Input")
        found, lat, lon = 0
        return
    else:
        found, lat, lon = zipChecker(inpt)
        return found, lat, lon
    
# Returns latitude and longitude of the zip code
def zipChecker(inpt):
    with open('USzipcodes.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        for col in reader:
            if inpt == col[0]:
                found = 1
                lat = col[1]
                lon = col[2]
                break;
            else:
                found = 0 
    if found == 1:
        return found, lat, lon
    else:
        return 0,0,0

# Prompts user to input a zipcode 
### To check if they will feel an earthquake at the inputted zipcode
Their zipcode is translated into latitude and longitude values

In [8]:
zipcode = input("Enter 5-digit zip code: ")
found, lat, lon = findMatch(zipcode)
if found == 0:
    print("Zipcode not found:", zipcode)
else:
    print("Latitude and Longitude at zipcode (", zipcode,"):", lat, lon)

Enter 5-digit zip code: 92119
Latitude and Longitude at zipcode ( 92119 ): 32.817888 -117.031956


# Logistic Regression Results:

Trains, Tests, Outputs the likelihood of feeling an earthquake at the given zipcode.

In [9]:
#training on the 'latitude' and 'longitude' and NOT including the 'magnitude', as 'felt' is based off the magnitude
training_features = ['latitude','longitude']
target = 'felt'
 
# Train , Test data split
train_x, test_x, train_y, test_y = train_test_split(dataset[training_features], dataset[target], train_size=0.7)
 
# Training Logistic regression model
trained_logistic_regression_model = train_logistic_regression(train_x, train_y)
 
train_accuracy = model_accuracy(trained_logistic_regression_model, train_x, train_y)
 
print("Train Accuracy = ", train_accuracy)
    
# Testing the logistic regression model
test_accuracy = model_accuracy(trained_logistic_regression_model, test_x, test_y)
    
print("Test Accuracy = ", test_accuracy,"\n")
    
X = [[float(lat),float(lon)]]
print("Predicting if will feel an earthquake for the lat and lon values:",X,"\n")
feltval = trained_logistic_regression_model.predict(X)
    
if feltval == 0:
    print("You will NOT-LIKELY feel an earthquake (mag: 0 - 2.4) at the zipcode:",zipcode)
elif feltval == 1: 
    print("You will MAYBE feel an earthquake (mag: 2.5 - 3.9) at the zipcode:",zipcode)
elif feltval == 2: 
    print("You will LIKELY feel an earthquake (mag: 4 - 4.9) at the zipcode:",zipcode) 
elif feltval == 3: 
    print("You will LIKELY feel an earthquake (mag: 5 - 5.9) and MIGHT see some MINOR-SERIOUS damage from it, at the zipcode:",zipcode)
elif feltval == 4: 
    print("You will LIKELY feel an earthquake (mag: 6 - 8) and LIKELY see some SERIOUS damage from it, at the zipcode:",zipcode)    



Train Accuracy =  0.8859882988965415
Test Accuracy =  0.8859993952222558 

Predicting if will feel an earthquake for the lat and lon values: [[32.817888, -117.031956]] 

You will MAYBE feel an earthquake (mag: 2.5 - 3.9) at the zipcode: 92119
