In [None]:
# Dependencies
import requests
import json
import pandas as pd
import numpy as np
import re
import io
from config import geoAPIKey
from collections import OrderedDict
from pandas.io.json import json_normalize  
import time
import json
from opencage.geocoder import OpenCageGeocode

import matplotlib.pyplot as plt
import cartopy

import sys
from itertools import combinations
from datetime import datetime
import random
import warnings

warnings.filterwarnings("ignore")

print("Python Version : ", sys.version)
print("Cartopy Version : ", cartopy.__version__)

%matplotlib inline

## General Resources
## https://amaral.northwestern.edu/blog/getting-long-lat-list-cities
## https://opencagedata.com/tutorials/geocode-in-python
## https://stackoverflow.com/questions/50001347/install-cartopy-dependencies-on-windows
## https://trac.osgeo.org/osgeo4w/

## Cartopy is installed  in virtual environment called pythonproject
## When opening jupyter notebook using the anaconda prompt, first run the command conda activate pythonproject
## Then run the  command jupyter notebook
## Once in the Jupyter notebook, in Kernel, Change Kernal, use Python Project ipykernel

## To regsiter a new virtual environment (ex: Python Project 2)
## In anaconda prompt ipython kernel install --user --name=pythonproject2 --display-name="Python Project 2"

## Resources for virtual environment in jupyter notebooks
## https://stackoverflow.com/questions/39604271/conda-environments-not-showing-up-in-jupyter-notebook
## https://www.geeksforgeeks.org/using-jupyter-notebook-in-virtual-environment/


In [None]:
#The function "load_csv" loads a CSV of affiliations

file_path = "affiliations.csv"
    
# Read the CSV file and store into Pandas DataFrame 
affiliation_df = pd.read_csv(file_path, encoding="utf-8")

affiliation_df.head()

In [None]:
## Examine the column names
affiliation_df.columns

In [None]:
## Create a list from one column
affiliation_list = affiliation_df['Affiliations_2016_to_2021'].tolist()
print(type(affiliation_list))

In [None]:
## Query the OpenCage Geocode API

geocoder = OpenCageGeocode(geoAPIKey)

## Test list to practice with: 
#affiliation_list_2 = ['Harvard University', 'San Jose, CA']

## Create empty list
multiple_affiliation_list = []
not_found_list = []

## Create empty dict
single_affiliation_dict = {}

## For loop for API request: 

for item in affiliation_list:
    
    try: 
        ## Make the API post request 
        result = geocoder.geocode(item, no_annotations='1')
#         print(type(result))
#         print(result)

        if result and len(result):
            longitude = result[0]['geometry']['lng']
            latitude  = result[0]['geometry']['lat']
#             print(u'%f;%f;%s' % (latitude, longitude, item))
            for variable in ["item", "longitude", "latitude"]:
                single_affiliation_dict[variable] = eval(variable)
        
            multiple_affiliation_list.append(single_affiliation_dict.copy())

        else:
            print("not found: ", item)
            not_found_list.append(item)
            ## Print responses
#         print(results)
        
        # If the response was successful, no Exception will be raised
#         results.raise_for_status()
#         print(results)
        time.sleep(1)
    
    except IOError as io_err:
        print(f'IO error occurred: {io_err}')  # Python 3.6
   
    except Exception as err:
        print(f'Other error occurred: {err}')  # Python 3.6
    
    else:
        print('Success!')
        

In [None]:
## Examine output
print(multiple_affiliation_list[1])
print(not_found_list)

In [None]:
## Convert output from list to pandas dataframe
affiliation_geo_df = pd.DataFrame(multiple_affiliation_list)
       
affiliation_geo_df.head()

In [None]:
## Save work to CSV and inspect as needed
with open(r"geo_affiliations.csv", 'w', encoding='utf-8') as file:
    test_df = pd.DataFrame(affiliation_geo_df, columns = ['item','longitude','latitude'])
    test_df.to_csv(file, index=True, line_terminator='\n')
    file.close()   

In [None]:
#The function "load_csv" loads a CSV 
file_path_2 = "goe_affiliations.csv"
    
# Read the CSV file and store into Pandas DataFrame 
affiliation_coordinate_df = pd.read_csv(file_path_2, encoding="utf-8").drop(['Unnamed: 0'],axis=1)

affiliation_coordinate_df.head()

In [None]:
## Examine the output

print(len(affiliation_df))
print(len(affiliation_coordinate_df))

In [None]:
## Merge the dataframes

joined_df = pd.merge(affiliation_df, affiliation_coordinate_df, left_on="Affiliations_2016_to_2021", right_on='item').drop(['item'],axis=1)
joined_df.head()

In [None]:
## Examine the output

print(len(joined_df))

In [None]:
## Save work to CSV and inspect as needed
with open(r"final_geo_affiliations.csv", 'w', encoding='utf-8') as file:
    final_2_df = pd.DataFrame(joined_df)
    final_2_df.to_csv(file, index=True, line_terminator='\n')
    file.close()   
    
    ##columns = ['Affiliations_2016_to_2021','record_count','longitude','latitude']

In [None]:
#The function "load_csv" loads a CSV of affiliations

file_path = "final_geo_affiliations.csv"
    
# Read the CSV file and store into Pandas DataFrame 
final_affiliation_df = pd.read_csv(file_path, encoding="utf-8").drop(['Unnamed: 0'],axis=1)

final_affiliation_df.head()

In [None]:
## Create lists for columns in dataframe

longitude = final_affiliation_df['longitude'].tolist()
latitude = final_affiliation_df['latitude'].tolist()
record_count = final_affiliation_df['Record_Count'].tolist()
print(type(longitude))

In [None]:
## Use Cartopy to create symbol map

## Install Cartopy using Conda (easier than pip): https://scitools.org.uk/cartopy/docs/latest/installing.html
## https://coderzcolumn.com/tutorials/data-science/cartopy-basic-maps-scatter-map-bubble-map-and-connection-map


import cartopy.crs as crs
import cartopy.feature as cfeature

fig = plt.figure(figsize=(20,40))

ax = fig.add_subplot(1,1,1, projection=crs.PlateCarree())

ax.add_feature(cfeature.COASTLINE, color="white")
ax.add_feature(cfeature.LAND, color="lightgrey", alpha=0.5)
ax.add_feature(cfeature.BORDERS, linestyle="-", color="white")
# ax.add_feature(cfeature.STATES)



plt.scatter(x=longitude, y=latitude,
            color="royalblue",
            s= record_count,
            alpha=0.5,
            transform=crs.PlateCarree()) ## Important

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)

plt.savefig('coauthors.png')
plt.show()
