# Candidate Locations with Combined NAICS + 40201

This document generates a new candidate location file that takes the selected locations of manufacturing and recycling, combining NAICS and 40201 locations. Steps (to be done with both, Manufacturing cost locations and Recycling cost locations)
1. Setup the format of the candidate locations dataframe.
    - Manufacturing cost NAICS.
    - Manufacturing cost 40201.
    - Recycling cost NAICS.
    - Recycling cost 40201.
2. Load the necessary files.
3. Extract the locations with their latitudes and longitudes.
4. Move the extracted columns into the candidate locations dataframe.
5. Map the locations with area cost factors.

In [25]:
import numpy as np
import pandas as pd
import os,sys
import matplotlib.pyplot as plt
from pathlib import Path

from geopy.geocoders import Nominatim
from geopy.point import Point
# initialize Nominatim API
geolocator = Nominatim(user_agent="geoapiExercises")

## 1. Dataframe formats

In [26]:
column_names = ['name', 'latitude (deg)', 'longitude (deg)', 'initial capacity (tonne)','area cost factor']

In [27]:
cl_template = pd.DataFrame(columns=column_names)

In [28]:
cwd = os.getcwd()

Load area cost factors

In [29]:
cwd

'/Users/hwikoff/Documents/RICE/3_RELOG_simulation'

In [30]:
area_cost_factors = pd.read_csv(os.path.join(cwd, 'resources', 'dod_area_cost_factors.csv'))

## 2. Load the output files

In [31]:
man_NAICS_raw = pd.read_csv(os.path.join(cwd, 'resources','output','before_mac_left', 'Manufacturing_NAICS_1', 'plants.csv'))
man_40209_raw = pd.read_csv(os.path.join(cwd, 'resources', 'output','before_mac_left', 'Manufacturing_40209_1', 'plants.csv'))

rec_NAICS_raw = pd.read_csv(os.path.join(cwd, 'resources', 'output','before_mac_left', 'Recycling_NAICS_1', 'plants.csv'))
rec_40209_raw = pd.read_csv(os.path.join(cwd, 'resources', 'output','before_mac_left', 'Recycling_40209_1', 'plants.csv'))


In [32]:
print('The NAICS Manufacturing plants have {} locations.'.format(len(man_NAICS_raw['location name'].unique())))
print('The 40209 Manufacturing plants have {} locations.'.format(len(man_40209_raw['location name'].unique())))
print('The NAICS Recycling plants have {} locations.'.format(len(rec_NAICS_raw['location name'].unique())))
print('The 40209 Recycling plants have {} locations.'.format(len(rec_40209_raw['location name'].unique())))

The NAICS Manufacturing plants have 62 locations.
The 40209 Manufacturing plants have 62 locations.
The NAICS Recycling plants have 81 locations.
The 40209 Recycling plants have 81 locations.


In [33]:
man_NAICS = man_NAICS_raw.loc[man_NAICS_raw['year'] == 1].reset_index()
man_40209 = man_40209_raw.loc[man_40209_raw['year'] == 1].reset_index()

rec_NAICS = rec_NAICS_raw.loc[rec_NAICS_raw['year'] == 1].reset_index()
rec_40209 = rec_40209_raw.loc[rec_40209_raw['year'] == 1].reset_index()

## 3. Extract the locations with their latitudes and longitudes

Fetch the states to map it with area cost factors.

In [34]:
man_NAICS['state']= man_NAICS['location name'].str.slice(-2)
man_40209['state']= man_40209['location name'].str.split(', ').str[-1]

rec_NAICS['state']= rec_NAICS['location name'].str.slice(-2)
rec_40209['state']= rec_40209['location name'].str.split(', ').str[-1]

## 4. Move necessary columns and map with area cost factors

Copy files.

In [35]:
cl_man_NAICS = cl_template.copy()
cl_man_40209 = cl_template.copy()

cl_rec_NAICS = cl_template.copy()
cl_rec_40209 = cl_template.copy()

Load area cost factors.

In [36]:
area_cost_factors.loc[area_cost_factors['State'] == 'Ohio']['ACF Official']

35    0.94
Name: ACF Official, dtype: float64

In [37]:
ohio_code = area_cost_factors[area_cost_factors['State'] == 'Ohio']['ACF Official'][35]

Move and map according area cost factors.

In [38]:
cl_man_NAICS['name'], cl_man_NAICS['latitude (deg)'],  cl_man_NAICS['longitude (deg)'], cl_man_NAICS['state'] = man_NAICS['location name'], man_NAICS['latitude (deg)'],man_NAICS['longitude (deg)'], man_NAICS['state']
cl_man_NAICS['initial capacity (tonne)'] = 0
cl_man_NAICS['area cost factor'] = cl_man_NAICS['state'].map(area_cost_factors.set_index('State Code')['ACF Official'])
cl_man_NAICS['area cost factor'] = cl_man_NAICS['area cost factor']/ohio_code

cl_man_40209['name'], cl_man_40209['latitude (deg)'],  cl_man_40209['longitude (deg)'], cl_man_40209['state'] = man_40209['location name'], man_40209['latitude (deg)'],man_40209['longitude (deg)'], man_40209['state']
cl_man_40209['initial capacity (tonne)'] = 0
cl_man_40209['area cost factor'] = cl_man_40209['state'].map(area_cost_factors.set_index('State')['ACF Official'])
cl_man_40209['area cost factor'] = cl_man_40209['area cost factor']/ohio_code

cl_rec_NAICS['name'], cl_rec_NAICS['latitude (deg)'],  cl_rec_NAICS['longitude (deg)'], cl_rec_NAICS['state'] = rec_NAICS['location name'], rec_NAICS['latitude (deg)'],rec_NAICS['longitude (deg)'], rec_NAICS['state']
cl_rec_NAICS['initial capacity (tonne)'] = 0
cl_rec_NAICS['area cost factor'] = cl_rec_NAICS['state'].map(area_cost_factors.set_index('State Code')['ACF Official'])
cl_rec_NAICS['area cost factor'] = cl_rec_NAICS['area cost factor']/ohio_code

cl_rec_40209['name'], cl_rec_40209['latitude (deg)'],  cl_rec_40209['longitude (deg)'], cl_rec_40209['state'] = rec_40209['location name'], rec_40209['latitude (deg)'],rec_40209['longitude (deg)'], rec_40209['state']
cl_rec_40209['initial capacity (tonne)'] = 0
cl_rec_40209['area cost factor'] = cl_rec_40209['state'].map(area_cost_factors.set_index('State')['ACF Official'])
cl_rec_40209['area cost factor'] = cl_rec_40209['area cost factor']/ohio_code


## 5. Join the NAICS with 40209

### 5.1. No discount

In [39]:
files_naics_40209_man_list = [cl_man_NAICS, cl_man_40209]

files_naics_40209_man = pd.concat(files_naics_40209_man_list, ignore_index=True)

In [40]:
files_naics_40209_rec_list = [cl_rec_NAICS, cl_rec_40209]

files_naics_40209_rec = pd.concat(files_naics_40209_rec_list, ignore_index=True)

In [41]:
previous_folder = os.path.normpath(os.getcwd() + os.sep + os.pardir) # Get previous folder from current directory

Drop the state column.

In [42]:
files_naics_40209_man.drop('state', axis=1, inplace= True)
files_naics_40209_rec.drop('state', axis=1, inplace= True)

In [43]:
files_naics_40209_man

Unnamed: 0,name,latitude (deg),longitude (deg),initial capacity (tonne),area cost factor
0,"Sandvik, Inc., FL",30.000000,-81.000000,0,0.914894
1,"Flex-Tec, Inc., GA",32.000000,-84.000000,0,0.925532
2,"Kuykendall Cement Corp., AR",35.000000,-92.000000,0,0.787234
3,"Baldor Electric Co., AR",35.000000,-93.000000,0,0.787234
4,"Carolina Graphics, SC",33.000000,-81.000000,0,0.936170
...,...,...,...,...,...
119,"Big Brown, Texas",31.701749,-96.144957,0,0.936170
120,"Chesterfield, Virginia",37.378434,-77.585847,0,0.872340
121,"Jefferies, South Carolina",33.207700,-79.953655,0,0.936170
122,"Henderson I, Kentucky",37.792542,-87.572577,0,0.914894


In [44]:
files_naics_40209_man.to_csv(os.path.join(previous_folder, '2_data_preparation','RELOG_import_data', 'CandidateLocations', 'cl_selected_man_naics_40209_together_no_discount.csv'), index=False)
files_naics_40209_rec.to_csv(os.path.join(previous_folder, '2_data_preparation', 'RELOG_import_data', 'CandidateLocations', 'cl_selected_rec_naics_40209_together_no_discount.csv'), index=False)

### 5.2. Discount

In [45]:
discount = 0.1 # Change this value for specific discount

In [46]:
files_naics_40209_man_discount = files_naics_40209_man.copy()
files_naics_40209_rec_discount = files_naics_40209_rec.copy()

In [47]:
files_naics_40209_man_discount['area cost factor'] = files_naics_40209_man_discount['area cost factor'] * (1-discount)
files_naics_40209_rec_discount['area cost factor'] = files_naics_40209_rec_discount['area cost factor'] * (1-discount)

In [48]:
files_naics_40209_man_discount.to_csv(os.path.join(previous_folder, '2_data_preparation','RELOG_import_data', 'CandidateLocations', 'cl_man_selected_naics_40209_together_discount.csv'), index=False)
files_naics_40209_rec_discount.to_csv(os.path.join(previous_folder, '2_data_preparation', 'RELOG_import_data', 'CandidateLocations', 'cl_rec_selected_naics_40209_together_discount.csv'), index=False)