### IEMS 394

#### Data Pipeline
Objective: Load and clean data, save as pickle.

In [8]:
%%time
import matplotlib.pyplot as plt
from shapely.geometry import shape, Point, Polygon
import pandas as pd
from tqdm import tqdm
import os, geojson, multiprocessing, datetime, time
import concurrent.futures
# %matplotlib tk

os.chdir('/Users/saifbhatti/Desktop/Northwestern/sy1920/s20/iems394/client-data/cleaned_data')

with open('us_counties_2010.json', encoding = "ISO-8859-1") as f:
    gj = geojson.load(f)
    
    
selected_states = ['TX','CA','MN']

CPU times: user 2.25 s, sys: 144 ms, total: 2.39 s
Wall time: 2.54 s


### Vision (end goals)

- Data Cleaning (python file) 
    - read in appropriate csv files that have been cleaned
    - convert to dataframe
    - pickle
- AMPL szn (python file)
    - unpickle
    - AMPLpy code that uses the dataframes as decision parameters
- Visualisation (ipynb)
    - either 

### Files

Vehicle Data:
- EV_Ranges.csv
- vehicle_ages.csv
- vehicles_available.csv
- trip_purpose_miles.csv
- HI_vehicles.csv
- HEV_model-sales.csv
- PEV_model-sales.csv
- Distance.csv



#### Import and Clean Vehicle Data

In [9]:
df = pd.read_csv('HEV_model-sales.csv',skiprows=1,thousands=',')
df.drop('Total',axis=1,inplace=True)
df.drop(df.index[df.shape[0]-1],inplace=True)
df.set_index(df['Vehicle'],inplace=True)
df.drop('Vehicle',axis=1,inplace=True)
df = df.applymap(lambda x: x.replace('-','0'))
df = df.applymap(lambda x: x.replace(",", ""))
# df = df.applymap(lambda x: x=int(x))
cols=[i for i in df.columns if i not in ['Vehicle']]
for col in cols:
    df[col]=pd.to_numeric(df[col])
hybrid_df = df

In [10]:
hybrid_df['total']= hybrid_df.sum(axis=1) #total sum per column
# hybrid_df['total'].plot(kind='bar',x='name',y='sales volume')
# hybrid_df

In [11]:
df = pd.read_csv('PEV_model-sales.csv',skiprows=1,thousands=',')
df.drop('Total',axis=1,inplace=True)
df.drop(df.index[df.shape[0]-1],inplace=True)
df.set_index(df['Vehicle'],inplace=True)
df.drop('Vehicle',axis=1,inplace=True)
# df.replace(to_replace='-',value=';')
df = df.applymap(lambda x: x.replace('-','0'))
df = df.applymap(lambda x: x.replace(",", ""))
cols=[i for i in df.columns if i not in ['Type']]
for col in cols:
    df[col]=pd.to_numeric(df[col])
electric_df = df

In [12]:
EV_ranges_df =  pd.read_csv('EV_ranges.csv')

![image.png](attachment:image.png)

In [13]:
df = pd.read_csv('electric_fuel_stations.csv')
electric_fuel_df = df
df.head()

Unnamed: 0.1,Unnamed: 0,Fuel Type Code,Station Name,Street Address,City,State,ZIP,Status Code,Groups With Access Code,Latitude,...,ID,Owner Type Code,Open Date,EV Connector Types,Country,Access Code,Facility Type,EV Pricing,EV On-Site Renewable Source,County
0,0,ELEC,City of Sacramento - Capitol Parking Garage,1015 L St,Sacramento,CA,95814,E,Public,38.57838,...,1498,LG,8/20/2018,NEMA520,US,public,PAY_GARAGE,,,Sacramento
1,1,ELEC,LADWP - Truesdale Center,11797 Truesdale St,Sun Valley,CA,91352,E,Private,34.248319,...,1517,LG,10/15/1999,J1772,US,private,UTILITY,,,Los Angeles
2,2,ELEC,Southern California Edison - Rosemead Office B...,2244 Walnut Grove Ave,Rosemead,CA,91770,E,Private,34.050745,...,1520,T,08/01/2011,J1772 NEMA520,US,private,UTILITY,,,Los Angeles
3,3,ELEC,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,E,Public,34.04057,...,1523,P,8/30/1995,J1772,US,public,PARKING_GARAGE,,,Los Angeles
4,4,ELEC,LADWP - John Ferraro Building,111 N Hope St,Los Angeles,CA,90012,E,Private,34.058476,...,1525,LG,10/15/1999,J1772,US,private,UTILITY,,,Los Angeles


In [14]:
# df['point'] = df['Longitude'] + df['Latitude']
df['Points'] = list(zip(df['Longitude'], df['Latitude']))
# df.tail(20)
df.head()

Unnamed: 0.1,Unnamed: 0,Fuel Type Code,Station Name,Street Address,City,State,ZIP,Status Code,Groups With Access Code,Latitude,...,Owner Type Code,Open Date,EV Connector Types,Country,Access Code,Facility Type,EV Pricing,EV On-Site Renewable Source,County,Points
0,0,ELEC,City of Sacramento - Capitol Parking Garage,1015 L St,Sacramento,CA,95814,E,Public,38.57838,...,LG,8/20/2018,NEMA520,US,public,PAY_GARAGE,,,Sacramento,"(-121.4926, 38.57838020000001)"
1,1,ELEC,LADWP - Truesdale Center,11797 Truesdale St,Sun Valley,CA,91352,E,Private,34.248319,...,LG,10/15/1999,J1772,US,private,UTILITY,,,Los Angeles,"(-118.3879714, 34.24831915)"
2,2,ELEC,Southern California Edison - Rosemead Office B...,2244 Walnut Grove Ave,Rosemead,CA,91770,E,Private,34.050745,...,T,08/01/2011,J1772 NEMA520,US,private,UTILITY,,,Los Angeles,"(-118.081014, 34.050745)"
3,3,ELEC,Los Angeles Convention Center,1201 S Figueroa St,Los Angeles,CA,90015,E,Public,34.04057,...,P,8/30/1995,J1772,US,public,PARKING_GARAGE,,,Los Angeles,"(-118.268762, 34.04057)"
4,4,ELEC,LADWP - John Ferraro Building,111 N Hope St,Los Angeles,CA,90012,E,Private,34.058476,...,LG,10/15/1999,J1772,US,private,UTILITY,,,Los Angeles,"(-118.24819, 34.058476)"


In [15]:
def geo_map(x):
    '''
    geo_map takes pandas Series object constructed in the format (longitude, latitude).
    Taking this as the point, a geo.json file is imported containing all US counties from the 2010 census as polygons.
    It computes whether the point is within a polygon, and returns a list of all matches. 
    If a point is not matched, it returns None.
    '''
    point = Point(x)
    for i in range(len(gj['features'])):
        polygon = shape(gj['features'][i]['geometry'])
        if polygon.contains(point):
            return gj['features'][i]['properties']['NAME']

In [16]:
def geo_two(x):
    '''
    geo_map takes pandas Series object constructed in the format (longitude, latitude).
    Taking this as the point, a geo.json file is imported containing all US counties from the 2010 census as polygons.
    It computes whether the point is within a polygon, and returns a list of all matches. 
    If a point is not matched, it returns None.
    '''
    counties = []
    for i in tqdm(x):
        point = Point(x)
        for i in range(len(gj['features'])):
            polygon = shape(gj['features'][i]['geometry'])
            if polygon.contains(point):
                counties.append(gj['features'][i]['properties']['NAME'])

In [17]:
counties = []
points_list = list(df['Points'])
# for i in tqdm(points_list):
#     counties.append(geo_map(i))

In [18]:
# electric_fuel_df['County'] = counties

In [19]:
# electric_fuel_df.to_csv('electric_fuel_stations.csv')

In [20]:
# electric_fuel_df

In [None]:
electric_df.to_pickle("./dummy.pkl")

### Convert data from zipcode to county.

In [21]:
tx_ev_vehicle_registered = pd.read_csv('vehicle_reg/tx_ev_registrations_public.csv')
print(len(tx_ev_vehicle_registered['ZIP Code'].unique()))
print(len(tx_ev_vehicle_registered))
tx_ev_vehicle_registered.head()

1325
74143


Unnamed: 0,ZIP Code,DMV ID,Vehicle Name,Registration Valid Date
0,3106,1,Tesla Model S,03/06/2019
1,3106,1,Tesla Model S,03/06/2019
2,3106,1,Tesla Model S,03/06/2019
3,3106,1,Tesla Model S,03/06/2019
4,3106,1,Tesla Model S,03/06/2019


In [22]:
mn_ev_vehicle_registered = pd.read_csv('vehicle_reg/mn_ev_registrations_public.csv')
mn_ev_vehicle_registered.drop(['VIN Prefix','VIN Model Year'],inplace=True,axis=1)
print(len(mn_ev_vehicle_registered['ZIP Code'].unique()))
print(len(mn_ev_vehicle_registered))
mn_ev_vehicle_registered.head()

600
29248


Unnamed: 0,DMV ID,ZIP Code,Registration Expiration Date,Registration Valid Date
0,6,55347.0,01/12/2020,30/01/2020
1,6,55432.0,01/12/2020,30/01/2020
2,6,55330.0,01/07/2020,30/01/2020
3,6,56377.0,01/04/2020,30/01/2020
4,6,55109.0,01/12/2020,30/01/2020


In [24]:
result = pd.concat([mn_ev_vehicle_registered, tx_ev_vehicle_registered], axis=0)
print(len(result['ZIP Code'].unique()))
print(result.shape)
result.head()

1906
(103391, 5)


Unnamed: 0,DMV ID,ZIP Code,Registration Expiration Date,Registration Valid Date,Vehicle Name
0,6,55347.0,01/12/2020,30/01/2020,
1,6,55432.0,01/12/2020,30/01/2020,
2,6,55330.0,01/07/2020,30/01/2020,
3,6,56377.0,01/04/2020,30/01/2020,
4,6,55109.0,01/12/2020,30/01/2020,


In [25]:
us_zips = pd.read_csv('uszips.csv')
print(us_zips.shape)
us_zips.head()

(33099, 18)


Unnamed: 0,zip,lat,lng,city,state_id,state_name,zcta,parent_zcta,population,density,county_fips,county_name,county_weights,county_names_all,county_fips_all,imprecise,military,timezone
0,601,18.18004,-66.75218,Adjuntas,PR,Puerto Rico,True,,17242,111.4,72001,Adjuntas,"{'72001':99.43,'72141':0.57}",Adjuntas|Utuado,72001|72141,False,False,America/Puerto_Rico
1,602,18.36073,-67.17517,Aguada,PR,Puerto Rico,True,,38442,523.5,72003,Aguada,{'72003':100},Aguada,72003,False,False,America/Puerto_Rico
2,603,18.45439,-67.12202,Aguadilla,PR,Puerto Rico,True,,48814,667.9,72005,Aguadilla,{'72005':100},Aguadilla,72005,False,False,America/Puerto_Rico
3,606,18.16724,-66.93828,Maricao,PR,Puerto Rico,True,,6437,60.4,72093,Maricao,"{'72093':94.88,'72121':1.35,'72153':3.78}",Maricao|Yauco|Sabana Grande,72093|72153|72121,False,False,America/Puerto_Rico
4,610,18.29032,-67.12243,Anasco,PR,Puerto Rico,True,,27073,312.0,72011,Añasco,"{'72003':0.55,'72011':99.45}",Añasco|Aguada,72011|72003,False,False,America/Puerto_Rico


In [26]:
# len(us_zips[(us_zips['zip'] > 70000) & (us_zips['zip'] < 90000)])

In [27]:
us_zips.columns

Index(['zip', 'lat', 'lng', 'city', 'state_id', 'state_name', 'zcta',
       'parent_zcta', 'population', 'density', 'county_fips', 'county_name',
       'county_weights', 'county_names_all', 'county_fips_all', 'imprecise',
       'military', 'timezone'],
      dtype='object')

In [28]:
new = pd.merge(result, us_zips, how='inner', left_on='ZIP Code', right_on='zip')

In [29]:
new['state_id'].unique()

array(['MN', 'CA', 'OH', 'MI', 'WI', 'IL', 'OR', 'MO', 'TX', 'NJ', 'AZ',
       'CT', 'NE', 'FL', 'ID', 'CO', 'MD', 'NY', 'VA', 'IA', 'WY', 'MA',
       'GA', 'WA', 'NH', 'PA', 'DC', 'NC', 'IN', 'KS', 'LA', 'OK', 'UT',
       'NM', 'NV', 'KY', 'SC', 'AR', 'AK'], dtype=object)

In [30]:
len(new[(new['state_id'] == 'TX') | (new['state_id'] == 'MN')])

97888

In [31]:
new.shape[0]

101278

In [32]:
new = new[(new['state_id'] == 'TX') | (new['state_id'] == 'MN')]
new['state_id'].unique()

array(['MN', 'TX'], dtype=object)

In [33]:
new.columns

Index(['DMV ID', 'ZIP Code', 'Registration Expiration Date',
       'Registration Valid Date', 'Vehicle Name', 'zip', 'lat', 'lng', 'city',
       'state_id', 'state_name', 'zcta', 'parent_zcta', 'population',
       'density', 'county_fips', 'county_name', 'county_weights',
       'county_names_all', 'county_fips_all', 'imprecise', 'military',
       'timezone'],
      dtype='object')

In [34]:
new.groupby('state_id').count()

Unnamed: 0_level_0,DMV ID,ZIP Code,Registration Expiration Date,Registration Valid Date,Vehicle Name,zip,lat,lng,city,state_name,...,population,density,county_fips,county_name,county_weights,county_names_all,county_fips_all,imprecise,military,timezone
state_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MN,29051,29051,29045,29048,6,29051,29051,29051,29051,29051,...,29051,29051,29051,29051,29051,29051,29051,29051,29051,29051
TX,68837,68837,4,68837,68833,68837,68837,68837,68837,68837,...,68837,68837,68837,68837,68837,68837,68837,68837,68837,68837


In [35]:
new.to_csv('ev_registration_county.csv')

In [36]:
cali = pd.read_csv('california_car_data.csv')
cali['Fuel'].unique()

  interactivity=interactivity, compiler=compiler, result=result)


array(['Gasoline', 'Diesel and Diesel Hybrid', 'Battery Electric',
       'Other', 'Flex-Fuel', 'Hybrid Gasoline', 'Natural Gas',
       'Plug-in Hybrid', 'Hydrogen Fuel Cell'], dtype=object)

In [37]:
cali.head()

Unnamed: 0,Date,Zip Code,Model Year,Fuel,Make,Duty,Vehicles
0,10/1/2018,90000,2006,Gasoline,OTHER/UNK,Light,1
1,10/1/2018,90000,2014,Gasoline,OTHER/UNK,Light,1
2,10/1/2018,90000,2016,Gasoline,OTHER/UNK,Light,1
3,10/1/2018,90000,2017,Gasoline,OTHER/UNK,Light,1
4,10/1/2018,90000,<2006,Diesel and Diesel Hybrid,OTHER/UNK,Heavy,55


Within the `IRS Individual Tax Income` dataset, the following columns are extracted:
- `STATE`: the relevant state the form was filed in.
- `zipcode`: the relevant zipcode the form was filed in.
- `NI`: Number of Returns.
- `A02650`: Total Income Amount.

Run `!curl https://www.irs.gov/pub/irs-soi/17zpallagi.csv -o irs.csv` to download.

In [38]:
irs = pd.read_csv('irs.csv') #read in the csv
irs = irs[['STATE','zipcode','A02650','N1']] #only keep a few columns (detailed above)
irs = irs.rename(columns={'A02650': 'total_income_amt', 'N1': 'return_count'})
irs.columns

Index(['STATE', 'zipcode', 'total_income_amt', 'return_count'], dtype='object')

In [39]:
irs['avg_income_amt'] = irs['total_income_amt'] / irs['return_count']

In [40]:
ranked_irs = irs.groupby('STATE').sum()
ranked_irs.sort_values(by='avg_income_amt', ascending=False).head()

Unnamed: 0_level_0,zipcode,total_income_amt,return_count,avg_income_amt
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TX,751356162,1834322000.0,24686880.0,1090974.0
CA,828823507,3131749000.0,35656240.0,1078311.0
NY,117768024,1726421000.0,19164470.0,946321.3
PA,141540136,869460800.0,12340460.0,783703.3
FL,183958530,1497832000.0,19986410.0,756466.1


In [41]:
irs = irs[irs['STATE'].isin(selected_states)] #keep only relevant states
irs.groupby('STATE').sum()

Unnamed: 0_level_0,zipcode,total_income_amt,return_count,avg_income_amt
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,828823507,3131749000.0,35656240.0,1078311.0
MN,265321122,422084000.0,5484850.0,442639.2
TX,751356162,1834322000.0,24686880.0,1090974.0


In [57]:
e85 = pd.read_csv('e85_fuel_stations.csv')

In [58]:
e85['Points'] = list(zip(e85['Longitude'], e85['Latitude']))
e85

Unnamed: 0.1,Unnamed: 0,Fuel Type Code,Station Name,City,State,ZIP,Latitude,Longitude,ID,E85 Blender Pump,Points,County
0,0,E85,Sioux Valley Co-op - Cenex,Watertown,SD,57201,44.904113,-97.130798,1447,True,"(-97.130798, 44.904113)",Codington
1,1,E85,F&M Co-op Oil,Madison,SD,57042,44.007690,-97.146785,1451,True,"(-97.146785, 44.00769)",Lake
2,2,E85,Kum & Go #113,Ames,IA,50010,42.034894,-93.575925,1470,True,"(-93.575925, 42.034894)",Story
3,3,E85,Texaco - Highway 34 Truckstop,West Burlington,IA,52655,40.825493,-91.208355,1479,False,"(-91.208355, 40.825493)",Des Moines
4,4,E85,Conoco - Convenient Food Mart,Jefferson City,MO,65109,38.588189,-92.255887,1480,False,"(-92.255887, 38.588189)",Cole
...,...,...,...,...,...,...,...,...,...,...,...,...
3556,3556,E85,76 - Foothill Petroleum Inc.,La Crescenta,CA,91214,34.232154,-118.253509,156016,False,"(-118.253509, 34.232154)",Los Angeles
3557,3557,E85,Conoco - 130 Truck Stop,Cinnaminson,NJ,8077,40.006900,-74.975100,156023,False,"(-74.9751, 40.0069)",Burlington
3558,3558,E85,BP,Fort Mill,SC,29708,35.030458,-80.966548,156024,False,"(-80.966548, 35.030458)",York
3559,3559,E85,Rutter's,Inwood,WV,25428,39.404224,-78.016428,156025,False,"(-78.01642778, 39.40422416)",Berkeley


In [44]:
# counties = []
# points_list = list(e85['Points'])
# for i in tqdm(points_list): 
#     counties.append(geo_map(i))
# e85['County'] = counties
# e85.to_csv('e85_fuel_stations.csv')

100%|██████████| 3561/3561 [04:20<00:00, 13.66it/s]


In [59]:
e85.groupby('County').count().sort_values(by='City', ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 0,Fuel Type Code,Station Name,City,State,ZIP,Latitude,Longitude,ID,E85 Blender Pump,Points
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Montgomery,56,56,56,56,56,56,56,56,56,56,56
Hennepin,54,54,54,54,54,54,54,54,54,54,54
Polk,47,47,47,47,47,47,47,47,47,47,47
Harris,47,47,47,47,47,47,47,47,47,47,47
Cook,45,45,45,45,45,45,45,45,45,45,45


In [None]:
# e85[e85['County']=="Los Angeles"]
e85.groupby('State').count().sort_values(by='Fuel Type Code', ascending=False).head(10)

In [50]:
centroids = pd.read_csv('county_centroids.csv')

In [51]:
centroids['Latitude'] = centroids['Latitude'].apply(lambda x: x.strip('°'))
centroids['Longitude'] = centroids['Longitude'].apply(lambda x: x.strip('°'))
centroids['Longitude'] = centroids['Longitude'].apply(lambda x: x.replace('–','-'))
centroids['Latitude'] = centroids['Latitude'].apply(lambda x: x.replace('+',''))

In [53]:
centroids['Points'] = list(zip(centroids['Longitude'], centroids['Latitude']))

In [63]:
selected_centroids = centroids[centroids['State'].isin(selected_states)]

In [64]:
selected_centroids

Unnamed: 0,State,FIPS,County,County Seat,Unnamed: 4,Land Area kmsq,Land Area misq,Total Area kmsq,Total Area misq,Latitude,Longitude,Points
186,CA,6001,Alameda,Oakland,1510271,1914.05,739.017,2127.23,821.328,37.648081,-121.913304,"(-121.913304, 37.648081)"
187,CA,6003,Alpine,Markleeville,1175,1912.27,738.332,1924.83,743.18,38.617610,-119.798999,"(-119.798999, 38.617610)"
188,CA,6005,Amador,Jackson,38091,1539.96,594.583,1569.42,605.956,38.443550,-120.653856,"(-120.653856, 38.443550)"
189,CA,6007,Butte,Oroville,220000,4238.42,1636.46,4343.75,1677.13,39.665959,-121.601919,"(-121.601919, 39.665959)"
190,CA,6009,Calaveras,San Andreas,45578,2641.82,1020.01,2685.63,1036.93,38.187844,-120.555115,"(-120.555115, 38.187844)"
...,...,...,...,...,...,...,...,...,...,...,...,...
2772,TX,48499,Wood,Quitman,41964,1671.15,645.234,1801.90,695.718,32.783588,-95.382166,"(-95.382166, 32.783588)"
2773,TX,48501,Yoakum,Plains,7879,2071.24,799.708,2071.27,799.721,33.172397,-102.823771,"(-102.823771, 33.172397)"
2774,TX,48503,Young,Graham,18550,2368.46,914.468,2410.91,930.859,33.158787,-98.678267,"(-98.678267, 33.158787)"
2775,TX,48505,Zapata,Zapata,14018,2585.88,998.412,2740.25,1058.02,26.996981,-99.182603,"(-99.182603, 26.996981)"


stuff to do today:
    
- focus on 394, clean up this file, solve the problem, make a new document
- then focus on renoster, create a list of tasks we have to do. pick 1 simple format and stick to it.

In [65]:
def create_circles(x,rad):
    '''
    county_scanner takes pandas Series object constructed in the format (longitude, latitude).
    Taking a point (which is the fuelling station) and constructs a circle of radius 'rad'.
    This is stored to a geojson file.
    '''
    point = Point(x)
    return point.buffer(rad)
    

In [None]:
from shapely import geometry

# create your two points
point_1 = geometry.Point(37.774929,-122.419418)
point_2 = geometry.Point(37.774929,-122.419418)

# create your circle buffer from one of the points
distance = 1000
circle_buffer = point_1.buffer(distance)

# and you can then check if the other point lies within
if point_2.within(circle_buffer):
    print('point 2 is within the distance buffer of point 1')
# or similarly
if circle_buffer.contains(point_2):
    print('circle buffer contains point 2')

# but a simpler method is to simply check the distance
if point_1.distance(point_2) < distance:
    print('point 1 is within the distance of point 2')

In [None]:
def geo_map(x):
    '''
    geo_map takes pandas Series object constructed in the format (longitude, latitude).
    Taking this as the point, a geo.json file is imported containing all US counties from the 2010 census as polygons.
    It computes whether the point is within a polygon, and returns a list of all matches. 
    If a point is not matched, it returns None.
    '''
    point = Point(x)
    for i in range(len(gj['features'])):
        polygon = shape(gj['features'][i]['geometry'])
        if polygon.contains(point):
            return gj['features'][i]['properties']['NAME']

In [None]:
from functools import partial
import pyproj
from shapely.ops import transform
from shapely.geometry import Point

proj_wgs84 = pyproj.Proj(init='epsg:4326')


def geodesic_point_buffer(lat, lon, km):
    # Azimuthal equidistant projection
    aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
    project = partial(pyproj.transform,
                        pyproj.Proj(aeqd_proj.format(lat=lon, lon=lat)),
                        proj_wgs84)
    buf = Point(0, 0).buffer(km * 1000)  # distance in metres
    return transform(project, buf).exterior.coords[:]

# Example
b = geodesic_point_buffer(37.774929,-122.419418, 100.0)

b

In [5]:
san_fran = Point(37.774929,-122.419418)

In [6]:
if b.contains(san_fran):
    print('yes')
else:
    print('no :(')

no :(


In [4]:
import pyproj as proj

# setup your projections
crs_wgs = proj.Proj(init='epsg:4326') # assuming you're using WGS84 geographic

# then cast your geographic coordinate pair to the projected system
x, y = proj.transform(crs_wgs,crs_wgs,-122.41,37.77)



In [5]:
from shapely import geometry

# create your two points
point_1 = geometry.Point(37.774929,-122.419418)
point_2 = geometry.Point(37.774929,-122.419418)

# create your circle buffer from one of the points
distance = 1000
circle_buffer = point_1.buffer(distance)

# and you can then check if the other point lies within
if point_2.within(circle_buffer):
    print('point 2 is within the distance buffer of point 1')
# or similarly
if circle_buffer.contains(point_2):
    print('circle buffer contains point 2')

# but a simpler method is to simply check the distance
if point_1.distance(point_2) < distance:
    print('point 1 is within the distance of point 2')

point 2 is within the distance buffer of point 1
circle buffer contains point 2
point 1 is within the distance of point 2


In [67]:
type(circle_buffer)

shapely.geometry.polygon.Polygon