# GEOJSON Points

    Create Lon Lat points from City, State names
    Imports CSV File with city names
    Uses geoapify to get lon lat

In [2]:
# Dependencies and Setup
import pandas as pd
import requests as req
import time
from geojson import Point, Feature, FeatureCollection, dump

# Import API key
from api_keys import geoapify_key

In [None]:
# Globals
# Note that the master csv file with all of the entries has been 
#  segmented into smaller files. Goal is to use geoapify within
#  their 3000 requests per day limit on free api keys

# The first file to read in - file has blank lon/lat values
raw_uap_data = 'UAP_DATA_4.csv'

# The updated lon/lat file results
updated_uap_data = 'uap_data_output_1-3000.csv'

# The City, State, Coutry group used
grouped_data_requests = 'grouped_data4_output.csv'

# The geojson output
geojson_output = 'uap_data_output.geojson'

In [5]:
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(raw_uap_data)

# Group city, state, country to limit geoapify queries
cities = uap_data_df.groupby(['City','State','Country']).sum().reset_index()

# Save queries into a list
geoapify_queries = []
geoapify_query = 'https://api.geoapify.com/v1/geocode/search?text='
for colname, data in cities.iterrows():
    city = data.iloc[0]
    State = data.iloc[1]
    Country = data.iloc[2]
    geoapify_queries.append(geoapify_query + 
                            city + ',' + 
                            State + ',' + 
                            Country + '&format=json&apiKey=' + geoapify_key)

print(len(cities))
uap_data_df.sample()

5695


Unnamed: 0,Occurred,Lon,Lat,City,State,Country,Shape,Summary,Media,Explanation
20830,4/26/1980,,,Avondale,AZ,USA,Light,"3 bright orange lights in the Avondale, AZ, area.",,


In [6]:
# limit the for loop as a safety measure
loop = 0

# Finding the lat lon for the city locations
# Adding / updating lat lon in the uap dataframe
for geo in geoapify_queries:

    try:
        # req geoapify
        city_json = req.get(geo).json()

        lon = city_json['results'][0]['lon']
        lat = city_json['results'][0]['lat']
        city_name = city_json['results'][0]['city']
        state_abbr = city_json['results'][0]['state_code']

        # add lon lat to dataframe
        uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lon'] = lon
        uap_data_df.loc[(uap_data_df['City'] == city_name) & (uap_data_df['State'] == state_abbr), 'Lat'] = lat
        
    except:
        print(city_name)
    
    if loop == 3000:
        break
    loop += 1

    time.sleep(.3)

uap_data_df.sample()

Aberdeen
Abingdon
Abington Township
Town of Rochester
Adams
Adams
Ellis Township
Aguilar
Aguilar
Alanson
Albemarle
Raleigh
Raleigh
Alfred
Allen
Gregg Township
Almont
Almont
Altha
Alto
Alvin
Alvord
American Falls
Amsterdam
Anacortes
Lincoln Crest
Annapolis
Anson
Antelope
Antelope
Antigo
Apache Junction
Village of Greenwood Lake
Town of Riverhead
Aragon
Archer City
Arizona City
Arnolds Park
Arpin
Ash Fork
Ash Fork
North Topsail Beach
Asherton
Town of Ashford
Ashland City
Ashton
West Ocean City
West Ocean City
Lake Waccamaw
Lake Waccamaw
Lake Waccamaw
Atascosa
Athelstane
Attalla
Au Train
Lower Providence Township
Aumsville
Austintown
Azwell
Burbank
Babbitt
Bad Axe
Bagley
Bainbridge Island
Bakersville
Baldwin City
Ball Ground
Baltimore
Baltimore Highlands
Coulee City
Banner Elk
Banning
Bardwell
Lake
Lake
Imperial Township
Middlecreek Township
Town of Highland
Bartlesville
Batavia
Bath
Bayville
Beacon Falls
Bear
Bear Valley
Beaumont
Goshen
Becket
Aiken County
Beeville
Belington
Bellevue
Bel

Unnamed: 0,Occurred,Lon,Lat,City,State,Country,Shape,Summary,Media,Explanation
8437,10/28/2022,-88.314754,41.75717,Aurora,IL,USA,Sphere,Bright orange sphere flying low without noise ...,,


In [7]:
# Save the data to a csv file
uap_data_df.to_csv(updated_uap_data, index=None)

In [8]:
# save the cities group
cities.to_csv(grouped_data_requests, index=None)

In [9]:
# may want to check the csv file for completeness before running this
# Load the UAP CSV file into a Pandas DataFrame
uap_data_df = pd.read_csv(updated_uap_data)

# Output the geojson data
feature_collections = {}

count = len(uap_data_df)
features = []
for i in range(count):
    testout = uap_data_df.iloc[i]
    point = Point((testout.loc[('Lon')], testout.loc[('Lat')]))

    features.append(Feature(geometry=point, 
                            properties={
                                "Occurred": testout.loc[('Occurred')],
                                "City": testout.loc[('City')],
                                "Country": testout.loc[('Country')],
                                "Shape": testout.loc[('Shape')],
                                "Summary": testout.loc[('Summary')]
                                }))

feature_collection = FeatureCollection(features)


with open(geojson_output, 'w') as f:
    dump(feature_collection, f)
