# GEOJSON Points

    Create Lon Lat points from City, State names
    Imports CSV File with city names
    Uses geoapify to get lon lat

In [1]:
# Dependencies and Setup
import pandas as pd
import requests as req
import time
from geojson import Point, Feature, FeatureCollection, dump

# Import API key
from api_keys import geoapify_key

In [4]:
# Globals
# Note that the master csv file with all of the entries has been 
#  segmented into smaller files. Goal is to use geoapify within
#  their 3000 requests per day limit on free api keys

# The first file to read in - file has blank lon/lat values
raw_uap_data = 'working/1/UAP_DATA_1.csv'

# The updated lon/lat file results
updated_uap_data = 'working/1/uap_data_output_1a.csv'

# The City, State, Coutry group used
grouped_data_requests = 'working/1/grouped_data_output_1a.csv'

# The geojson output
geojson_output = 'working/1/uap_data_output_1a.geojson'

In [6]:
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(raw_uap_data)

# Group city, state, country to limit geoapify queries
cities = uap_data_df.groupby(['City','State','Country']).sum().reset_index()

# Save queries into a list
geoapify_queries = []
geoapify_query = 'https://api.geoapify.com/v1/geocode/search?text='
for colname, data in cities.iterrows():
    city = data.iloc[0]
    State = data.iloc[1]
    Country = data.iloc[2]
    geoapify_queries.append(geoapify_query + 
                            city + ',' + 
                            State + ',' + 
                            Country + '&format=json&apiKey=' + geoapify_key)

print(len(cities))
uap_data_df.sample()

6779


Unnamed: 0,Occurred,Lon,Lat,City,State,Country,Shape,Summary,Media,Explanation
20636,10/8/2008,,,Ukiah,CA,USA,Light,"""Bright-Star or Something Else""! ((NUFORC Note...",,


In [7]:
# limit the for loop as a safety measure
loop_counter = 0
loop_start_at = 0
loop_end_at = 6780

# Finding the lat lon for the city locations
# Adding / updating lat lon in the uap dataframe
for geo in geoapify_queries:

    try:
        if loop_start_at <= loop_counter <= loop_end_at:  
            # req geoapify
            city_json = req.get(geo).json()

            lon = city_json['results'][0]['lon']
            lat = city_json['results'][0]['lat']
            city_name = city_json['results'][0]['city']
            state_abbr = city_json['results'][0]['state_code']

            # add lon lat to dataframe
            uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lon'] = lon
            uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lat'] = lat
            
    except:
        print(city_name)
    
    if loop_counter == loop_end_at:
        break
    loop_counter += 1

    time.sleep(.5)

uap_data_df.sample()

Ridgeville
Ridgway
Riley
Lebanon Township
Rimrock
Ringold
Rio
Río Piedras
Río Piedras
Rio Rancho
River Falls
Prescott
River Pines
River Pines
Norco
Riverwoods
Riviera Beach
Roachdale
Roaring River
Robbinsville Township
Roberts
Wood Township
Robstown
Rocheport
Rochester
Rochester
Rocky River
New York
Rockdale
Rock Hill
Rockingham
Rockingham
Rockland
Rockledge
Roseville
Rockvale
Rockwood Strip T1 R1 NBKP


In [14]:
# Save the data to a csv file
uap_data_df.to_csv(updated_uap_data, index=None)

In [15]:
# save the cities group
cities.to_csv(grouped_data_requests, index=None)

In [19]:
# may want to check the csv file for completeness before running this
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(updated_uap_data)

# Output the geojson data
feature_collections = {}

count = len(uap_data_df)
features = []
for i in range(count):
    testout = uap_data_df.iloc[i]
    point = Point((testout.loc[('Lon')], testout.loc[('Lat')]))

    features.append(Feature(geometry=point, 
                            properties={
                                "Occurred": testout.loc[('Occurred')],
                                "City": testout.loc[('City')],
                                "State": testout.loc[('State')],
                                "Country": testout.loc[('Country')],
                                "Shape": testout.loc[('Shape')],
                                "Summary": testout.loc[('Summary')]
                                }))

feature_collection = FeatureCollection(features)


with open(geojson_output, 'w') as f:
    dump(feature_collection, f)
