In [6]:
import sqlalchemy
import pandas as pd
import pandasql as ps

from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, distinct, and_, inspect

import datetime as dt
import requests
import math
import numpy as np

from sklearn.linear_model import LogisticRegression
import joblib


## Retrieve last 30 days function

In [7]:


def ReadUSGS(BaseDate = dt.datetime.today()):
    
    StartDate = (BaseDate - dt.timedelta(days=30)).strftime('%Y-%m-%d %H:%M:%S')
    EndDate = BaseDate.strftime('%Y-%m-%d %H:%M:%S')
    
    url = f'https://earthquake.usgs.gov/fdsnws/event/1/query.geojson?starttime={StartDate}&endtime={EndDate}&maxlatitude=41.961&minlatitude=32.813&maxlongitude=-114.521&minlongitude=-124.255&minmagnitude=2.5&orderby=time'
    
    data = requests.get(url)
    return data.json()

In [8]:
def AddPOR(Main_df):
    periods = np.array([])
    for index, row in Main_df.iterrows():
        catagory = f"Mag{int(row['category']*10)}"
        city = row['ClosestCity']
        periods = np.append(periods, POR_df.loc[city,catagory])

    Main_df['POR'] = periods

    return Main_df

In [9]:

citygeos = pd.read_csv("city geos.csv")



In [10]:

def FindCity(lat, lon):
    distCompare = 100000000
    for i in range(0,len(citygeos)):
        latCity = citygeos.iloc[i,1]
        lonCity = citygeos.iloc[i,2]
        x = latCity - lat
        y = (lonCity - lon) * math.cos(lat * math.pi/180)
        distance = 110.25 * (x**2 +y**2)**0.5
        if distance < distCompare:
            Geo = citygeos.iloc[i,0]
            distCompare = distance
    return Geo


In [40]:
def df_to_json(df):
    
    features = []
    
    for index, row in df.iterrows():
        
        # Read record from dataframe 
        lon = row['Longitude']
        lat = row['Latitude']
        mag = row['mag']
        city = row['ClosestCity']
        
        # build record into dictionary 
        record = {
            
            'geometry':{
                'coordinates': [lon, lat, depth]
            },
            
            "properties": {
                'mag': mag,
                'title':city
            }
              
        }
                   
        features.append(record)

    return {'features':features}
        
        
    

In [12]:
    def generate_features(df):
        features = POR_df.columns
        cities =df['ClosestCity'].unique()

        my_df = pd.DataFrame(columns = features)
        my_df.insert(0,'ClosestCity',cities)
        my_df = my_df.set_index('ClosestCity')
        my_df.fillna(0, inplace =True)

        agg = df.groupby('ClosestCity')['category'].value_counts()
        for item in agg.index:

            #print(item[0], item[1] , agg[item[0]][item[1]])
            my_df['Mag' + str(int(item[1]*10))][item[0]] = agg[item[0]][item[1]]

        cities_df = citygeos.copy()
        cities_df.rename(columns= {'City':'ClosestCity'}, inplace =True)

        output_df = pd.merge(cities_df, my_df, on = 'ClosestCity')
        
        return output_df
    

In [13]:
def Json_to_df(response):
    
    rows = []
    for record in response['features']:

        mag = record['properties']['mag']
        lon = record['geometry']['coordinates'][0]
        lat = record['geometry']['coordinates'][1]

        if mag - int(mag) > .5:
            n = 0.5 
        else:
            n = 0


        row = {

            'longitude': lon,
            'latitude': lat,
            'depth': record['geometry']['coordinates'][2],
            'mag': mag,
            'category': int(mag) + n,
            'ClosestCity': FindCity(lat=lat,lon=lon)

        }

        rows.append(row)
        
    df=pd.DataFrame(rows)
    
    return df

In [14]:
POR_df = pd.read_csv('POR.csv')
POR_df.set_index('ClosestCity',inplace = True)
POR_df

Unnamed: 0_level_0,Mag20,Mag25,Mag30,Mag35,Mag40,Mag45,Mag50,Mag55,Mag60,Mag65,Mag70,Mag75,Mag80,Mag85,Mag90
ClosestCity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Anaheim,2500.0,0.549451,2.173913,5.000000,16.666667,50.000000,50.000000,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
Antelope Peak NV,2500.0,2.083333,3.333333,3.571429,16.666667,16.666667,2500.000000,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
Avenal CA,2500.0,0.027579,0.067204,0.196078,0.581395,2.272727,7.142857,16.666667,2500.0,50.0,2500.0,2500.0,2500.0,2500.0,2500.0
Azusa,2500.0,0.649351,3.125000,7.142857,25.000000,2500.000000,2500.000000,50.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
Bakersfield,2500.0,0.085911,0.314465,1.250000,2.941176,7.142857,16.666667,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Walnut Creek,2500.0,0.213675,0.641026,2.500000,5.555556,2500.000000,2500.000000,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
Warm Springs NV,2500.0,0.500000,1.351351,3.846154,10.000000,2500.000000,2500.000000,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
"Willits, CA",2500.0,0.109409,0.304878,1.666667,3.333333,12.500000,2500.000000,2500.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0
Yosemite,2500.0,0.139665,0.581395,1.851852,10.000000,50.000000,2500.000000,50.000000,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0,2500.0


In [15]:

def DefineProblem(AtDate):
    
    myDate = dt.datetime.strptime(AtDate, '%Y-%m-%d %H:%M:%S')

    response = ReadUSGS(myDate)

    df = Json_to_df(response)

    # df = AddPOR(df)
    
    output_df = generate_features(df)

    return output_df




In [35]:
# def target(AtDate)
myDate = '2020-10-17 00:00:00'
myDate = dt.datetime.strptime(AtDate, '%Y-%m-%d %H:%M:%S')
response = ReadUSGS(myDate)

df = Json_to_df(response)

    

NameError: name 'AtDate' is not defined

In [36]:
df =DefineProblem('2020-10-17 00:00:00')
df

Unnamed: 0,ClosestCity,Latitude,Longitude,Mag20,Mag25,Mag30,Mag35,Mag40,Mag45,Mag50,Mag55,Mag60,Mag65,Mag70,Mag75,Mag80,Mag85,Mag90
0,Hemet,33.74,-116.99,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Palm Springs,33.8,-116.54,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0
2,Riverside,33.94,-117.39,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,Ontario,34.04,-117.61,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
4,San Bernardino,34.14,-117.3,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
5,Pasadena,34.16,-118.14,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
6,San Jose,37.3,-121.85,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Fremont,37.52,-122.0,0,4,3,0,0,0,0,0,0,0,0,0,0,0,0
8,Salinas,36.69,-121.63,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0
9,Oakland,37.77,-122.22,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0


In [37]:
X = df.iloc[:,3:]
X

Unnamed: 0,Mag20,Mag25,Mag30,Mag35,Mag40,Mag45,Mag50,Mag55,Mag60,Mag65,Mag70,Mag75,Mag80,Mag85,Mag90
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,4,3,0,0,0,0,0,0,0,0,0,0,0,0
8,0,6,1,0,0,0,0,0,0,0,0,0,0,0,0
9,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
# make prediction
model = joblib.load('final_model.sav')
predictions = model.predict(X)
df.insert(3, 'mag', predictions)
output_df = df.iloc[:,:4]
output_df
# longitude
# Longitude	

Unnamed: 0,ClosestCity,Latitude,Longitude,mag
0,Hemet,33.74,-116.99,2
1,Palm Springs,33.8,-116.54,2
2,Riverside,33.94,-117.39,2
3,Ontario,34.04,-117.61,2
4,San Bernardino,34.14,-117.3,2
5,Pasadena,34.16,-118.14,2
6,San Jose,37.3,-121.85,2
7,Fremont,37.52,-122.0,2
8,Salinas,36.69,-121.63,2
9,Oakland,37.77,-122.22,2


In [41]:
json = df_to_json(output_df)
json

KeyError: 'depth'