# GEOJSON Points

    Create Lon Lat points from City, State names
    Imports CSV File with city names
    Uses geoapify to get lon lat

In [3]:
# Dependencies and Setup
import pandas as pd
import requests as req
import time
from geojson import Point, Feature, FeatureCollection, dump

# Import API key
from api_keys import geoapify_key

In [16]:
# Globals
# Note that the master csv file with all of the entries has been 
#  segmented into smaller files. Goal is to use geoapify within
#  their 3000 requests per day limit on free api keys

# The first file to read in - file has blank lon/lat values
raw_uap_data = 'working/uap_data_output_final.csv'

# The updated lon/lat file results
updated_uap_data = 'working/uap_data_output_final.csv'

# The City, State, Coutry group used
grouped_data_requests = 'working/grouped_data_output.csv'

# The geojson output
geojson_output = 'working/uap_data_output.geojson'

In [17]:
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(raw_uap_data)

# Group city, state, country to limit geoapify queries
cities = uap_data_df.groupby(['City','State','Country']).sum().reset_index()

# Save queries into a list
geoapify_queries = []
geoapify_query = 'https://api.geoapify.com/v1/geocode/search?text='
for colname, data in cities.iterrows():
    city = data.iloc[0]
    State = data.iloc[1]
    Country = data.iloc[2]
    geoapify_queries.append(geoapify_query + 
                            city + ',' + 
                            State + ',' + 
                            Country + '&format=json&apiKey=' + geoapify_key)

print(len(cities))
uap_data_df.sample()

11224


Unnamed: 0,Occurred,Lon,Lat,City,State,Legal_Cannabis,Country,Shape,Summary,Media,Explanation
13500,8/3/1900,-87.9,42.95,Milwaukee,WI,1/1/2050,USA,Starbucks,"5300 South Howell Ave, Third Ward",,


In [14]:
# limit the for loop as a safety measure
loop_counter = 0
loop_start_at = 3001
loop_end_at = 6779

# Finding the lat lon for the city locations
# Adding / updating lat lon in the uap dataframe
for geo in geoapify_queries:

    try:
        if loop_start_at <= loop_counter <= loop_end_at:  
            # req geoapify
            city_json = req.get(geo).json()
            print(f'itteration {loop_counter}')
            lon = city_json['results'][0]['lon']
            lat = city_json['results'][0]['lat']
            city_name = city_json['results'][0]['city']
            state_abbr = city_json['results'][0]['state_code']

            # add lon lat to dataframe
            uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lon'] = lon
            uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lat'] = lat
            
    except:
        print(city_name)
    
    if loop_counter == loop_end_at:
        break
    loop_counter += 1

    # time.sleep(.5)

uap_data_df.sample()

itteration 201
itteration 202
Rochester
itteration 203
itteration 204
Rochester
itteration 205
itteration 206
itteration 207
itteration 208
itteration 209
itteration 210
itteration 211
itteration 212
itteration 213
itteration 214
itteration 215
itteration 216
itteration 217
itteration 218
itteration 219
itteration 220
itteration 221
itteration 222
itteration 223
itteration 224
itteration 225
itteration 226
itteration 227
itteration 228
itteration 229
itteration 230
itteration 231
itteration 232
itteration 233
itteration 234
itteration 235
Rocky River
itteration 236
itteration 237
itteration 238
itteration 239
itteration 240
itteration 241
itteration 242
itteration 243
itteration 244
itteration 245
itteration 246
itteration 247
itteration 248
itteration 249
itteration 250
New York
itteration 251
itteration 252
Rockdale
itteration 253
itteration 254
itteration 255
itteration 256
itteration 257
itteration 258
itteration 259
itteration 260
itteration 261
itteration 262
itteration 263
itter

Unnamed: 0,Occurred,Lon,Lat,City,State,Country,Shape,Summary,Media,Explanation
8241,10/30/2004,-122.283572,47.513069,Seattle,WA,USA,Formation,Flying v-formation of glowing orb-like craft.,,


In [18]:
# Save the data to a csv file
uap_data_df.to_csv(updated_uap_data, index=None)

In [19]:
# save the cities group
cities.to_csv(grouped_data_requests, index=None)

In [20]:
# may want to check the csv file for completeness before running this
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(updated_uap_data)

# Output the geojson data
feature_collections = {}

count = len(uap_data_df)
features = []
for i in range(count):
    testout = uap_data_df.iloc[i]
    point = Point((testout.loc[('Lon')], testout.loc[('Lat')]))

    features.append(Feature(geometry=point, 
                            properties={
                                "Occurred": testout.loc[('Occurred')],
                                "City": testout.loc[('City')],
                                "State": testout.loc[('State')],
                                "Country": testout.loc[('Country')],
                                "Shape": testout.loc[('Shape')],
                                "Summary": testout.loc[('Summary')],
                                "Legal_Cannabis": testout.loc[('Legal_Cannabis')]
                                }))

feature_collection = FeatureCollection(features)


with open(geojson_output, 'w') as f:
    dump(feature_collection, f)
