In [None]:
# This processes output csv files from ArcGIS into files that are injested into the pipeline.
# Specifically, read in ArcGIS csv files, extract relevant coordinates, and them write out to
#     another, 'processed' csv with additional info in a header

In [6]:
import ephem
import cartopy.crs as ccrs
import skyfield
import pandas as pd
import numpy as np
import ipdb
import matplotlib.pyplot as plt

%matplotlib qt

In [2]:
# read in raw file as written out by ArcGIS
df = pd.read_csv("./data/raw_gis_output/Export_Output_ATA_equid_50000.txt")

In [3]:
df

Unnamed: 0,OBJECTID,Join_Count,TARGET_FID,Id,X_CEA,Y_CEA,GID_0,NAME_0,LON,LAT
0,1,0,0,0,,0,,,,
1,2,1,1,0,-1.996251e+07,-19962507,ATA,Antarctica,-179.326252,-84.942232
2,3,1,2,0,-1.991251e+07,-19912507,ATA,Antarctica,-178.877094,-84.942232
3,4,1,3,0,-1.986251e+07,-19862507,ATA,Antarctica,-178.427937,-84.942232
4,5,1,4,0,-1.981251e+07,-19812507,ATA,Antarctica,-177.978779,-84.942232
...,...,...,...,...,...,...,...,...,...,...
14431,14432,0,14431,0,1.983749e+07,19837493,,,178.203231,-59.710907
14432,14433,0,14432,0,1.988749e+07,19887493,,,178.652389,-59.710907
14433,14434,0,14433,0,1.993749e+07,19937493,,,179.101546,-59.710907
14434,14435,0,14434,0,1.998749e+07,19987493,,,179.550704,-59.710907


In [17]:
# apply cuts

preserve_df_1 = df.where(df["Join_Count"] == 1).dropna(how="all")
preserve_df_2 = preserve_df_1.where(preserve_df_1["LAT"] < -60.).dropna(how="all")
preserve_df_final = preserve_df_2.where(np.logical_and(preserve_df_1["LON"] < -20.,preserve_df_1["LON"] > -80.)).dropna(how="all")

In [18]:
plt.hist(preserve_df_final["LON"])
plt.show()

In [21]:
preserve_df_final

Unnamed: 0,OBJECTID,Join_Count,TARGET_FID,Id,X_CEA,Y_CEA,GID_0,NAME_0,LON,LAT
223,224.0,1.0,223.0,0.0,-8.862507e+06,-8862507.0,ATA,Antarctica,-79.613256,-84.942232
224,225.0,1.0,224.0,0.0,-8.812507e+06,-8812507.0,ATA,Antarctica,-79.164098,-84.942232
225,226.0,1.0,225.0,0.0,-8.762507e+06,-8762507.0,ATA,Antarctica,-78.714940,-84.942232
226,227.0,1.0,226.0,0.0,-8.712507e+06,-8712507.0,ATA,Antarctica,-78.265783,-84.942232
227,228.0,1.0,227.0,0.0,-8.662507e+06,-8662507.0,ATA,Antarctica,-77.816625,-84.942232
...,...,...,...,...,...,...,...,...,...,...
10699,10700.0,1.0,10699.0,0.0,-6.362507e+06,-6362507.0,ATA,Antarctica,-57.155373,-63.474308
10702,10703.0,1.0,10702.0,0.0,-6.212507e+06,-6212507.0,ATA,Antarctica,-55.807901,-63.474308
11495,11496.0,1.0,11495.0,0.0,-6.662507e+06,-6662507.0,ATA,Antarctica,-59.850319,-62.489439
13130,13131.0,1.0,13130.0,0.0,-5.112507e+06,-5112507.0,ATA,Antarctica,-45.926432,-60.610574


In [26]:
plt.clf()

ax = plt.axes(projection=ccrs.PlateCarree())
#ax = plt.axes(projection=ccrs.RotatedPole())

ax.scatter(preserve_df_final["LON"], preserve_df_final["LAT"], color="red", transform=ccrs.PlateCarree())
#ax.scatter(preserve_df_final["LON"], preserve_df_final["LAT"], color="red", transform=ccrs.RotatedPole())

ax.coastlines()
ax.set_global()

# Save the plot by calling plt.savefig() BEFORE plt.show()
#plt.savefig('coastlines.pdf')
#plt.savefig('coastlines.png')

plt.show()

In [29]:
# write out coordinates file with a header

technical_string = "brit_antarctica"
processed_file_name = technical_string+".csv"
hdr = open(processed_file_name, "w")

hdr.write('KEY;VALUE;COMMENT;REF\n')
hdr.write('technical_string;'+technical_string+';;\n')
hdr.write('display_string;British Antarctic Territory;;\n')
hdr.write('start_date;1962 March 3;Statutory Instrument - the British Antarctic Territory Order in Council 1962/400;britishantarcticterritory.org.uk\n')
hdr.write('end_date;-99999;;\n')
hdr.write('other;;;\n')

hdr.write('##############################\n')

preserve_df_final.to_csv(hdr, columns=["LAT","LON"], index=False)
print("Wrote out " + processed_file_name)

Wrote out brit_antarctica.csv
