In [21]:
import pandas as pd
import numpy as np

I want to use the 'site_area_geo_coord1' dataframe values to map the site locations for which the soil emissions need to be predicted i.e., NSA and SSA sites at the Old Black Spruce(OBS) location. First, the below function is called to choose the required columns and rows from the above dataframes and then I will find the null values in the table. Post filling the null values the dataframe is saved in a .csv format so that it can be used in Tableau to plot the coordinates of the sites.

### Reading and Cleaning the data

In [22]:
#Reading the CSV data files
site_area_geo_coord1_data = pd.read_csv('data/site_area_geo_coord1.csv', sep="''", delimiter=",", quotechar="'")

In [23]:
def Clean_data(data_frame):

    # Cleaning up columns and column headers:
    new_headers = []

    for col in data_frame.select_dtypes([np.object]):
        data_frame[col] = data_frame[col].str.strip('"')

    for col in data_frame.select_dtypes([np.object]):
        data_frame[col] = data_frame[col].str.strip("['']")

    for header in data_frame.columns: # data.columns is your list of headers
        header = header.strip('"') # Remove the quotes off each header
        new_headers.append(header) # Save the new strings without the quotes

    data_frame.columns = new_headers
    print(f"The dataframe information and the dataframe head is as given: ")
    data_frame.info()
    return data_frame.head()

In [24]:
Clean_data(site_area_geo_coord1_data)

The dataframe information and the dataframe head is as given: 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2080 entries, 0 to 2079
Data columns (total 27 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   SITE_ID              2080 non-null   object 
 1   OP_GRID_ID           1347 non-null   object 
 2   BOREAS_X             1873 non-null   float64
 3   BOREAS_Y             1874 non-null   float64
 4   LATITUDE             2007 non-null   float64
 5   SDEV_LATITUDE        1174 non-null   float64
 6   LONGITUDE            2007 non-null   float64
 7   SDEV_LONGITUDE       1174 non-null   float64
 8   UTM_ZONE             1989 non-null   float64
 9   UTM_EASTING          1877 non-null   float64
 10  UTM_NORTHING         1878 non-null   float64
 11  SLOPE                0 non-null      float64
 12  ASPECT               0 non-null      float64
 13  ELEVATION            1384 non-null   float64
 14  SDEV_ELEVATION       1214

Unnamed: 0,SITE_ID,OP_GRID_ID,BOREAS_X,BOREAS_Y,LATITUDE,SDEV_LATITUDE,LONGITUDE,SDEV_LONGITUDE,UTM_ZONE,UTM_EASTING,...,GEO_INFO,VEG_INFO,LOCATION_DESCR,COORD_INFO_SOURCE,TERRAIN_INFO_SOURCE,SOIL_INFO_SOURCE,GEO_INFO_SOURCE,VEG_INFO_SOURCE,REVISION_DATE,COMMENTS
0,C3B7X,C3B7X,317.208,303.419,53.62903,,-106.19763,,13.0,420801.3,...,,,SSA OA,SAT. IMAGE,,,,,30-JAN-98,Category I
1,D0H4X,D0H4X,374.691,310.653,53.65498,,-105.32232,,13.0,478718.9,...,,,SSA YA,SAT. IMAGE,,,,,30-JAN-98,Category I
2,F0L9X,F0L9X,419.49,330.893,53.80121,,-104.61867,,13.0,525114.9,...,,,SSA Fen,SAT. IMAGE,,,,,30-JAN-98,Category I
3,F8L6X,F8L6X,416.907,338.981,53.87563,,-104.64655,,13.0,523237.4,...,,,SSA YJP,SAT. IMAGE,,,,,30-JAN-98,Category I
4,G2L3X,G2L3X,413.678,343.249,53.91641,,-104.68961,,13.0,520386.6,...,,,SSA OJP,SAT. IMAGE,,,,,30-JAN-98,Category I


### Prepping site_geo_coord1 dataframe for plotting coordinates

In [25]:
site_area_geo_coord1_data.rename(columns={'SITE_ID':'SITE_NAME'}, inplace = True)

In [26]:
def select_columns(data_frame, column_names):
    new_frame = data_frame.loc[:, column_names]
    new_frame = new_frame[new_frame['SITE_NAME'].str.contains('NSA-OBS-|SSA-OBS-')]
    return new_frame

In [29]:
new_frame=select_columns(site_area_geo_coord1_data, ['SITE_NAME', 'LATITUDE', 'LONGITUDE','LOCATION_DESCR'])

In [30]:
new_frame.head()

Unnamed: 0,SITE_NAME,LATITUDE,LONGITUDE,LOCATION_DESCR
627,NSA-OBS-99OBS-TGB12-CON01,,,TGB-12 SOil Carbon measurement site.
628,NSA-OBS-9OBS1-TGB12-CON01,,,Old Black Spruce
629,NSA-OBS-9OBS2-TGB12-CON01,,,TGB12 measurement site near Old Black Spruce
630,NSA-OBS-FCA01-FORCN-1FCAN,55.88329,-98.47756,TE-OBS Plot 1
631,NSA-OBS-FCA01-FORCN-2FCAN,55.88316,-98.47574,TE-OBS Plot2


In [31]:
new_frame.isna().sum()

SITE_NAME         0
LATITUDE          7
LONGITUDE         7
LOCATION_DESCR    0
dtype: int64

### Filling the Null Values

In [32]:
Boreal_Site_Coords= new_frame.fillna(value={'LATITUDE':55.88007})
Boreal_Site_Coords= Boreal_Site_Coords.fillna(value={'LONGITUDE':-98.4803})
Boreal_Site_Coords

Unnamed: 0,SITE_NAME,LATITUDE,LONGITUDE,LOCATION_DESCR
627,NSA-OBS-99OBS-TGB12-CON01,55.88007,-98.48030,TGB-12 SOil Carbon measurement site.
628,NSA-OBS-9OBS1-TGB12-CON01,55.88007,-98.48030,Old Black Spruce
629,NSA-OBS-9OBS2-TGB12-CON01,55.88007,-98.48030,TGB12 measurement site near Old Black Spruce
630,NSA-OBS-FCA01-FORCN-1FCAN,55.88329,-98.47756,TE-OBS Plot 1
631,NSA-OBS-FCA01-FORCN-2FCAN,55.88316,-98.47574,TE-OBS Plot2
...,...,...,...,...
1873,SSA-OBS-FLXTR-TGB09-CON01,53.98717,-105.11779,TGB-09 NMHC measurement site near the SSA Old ...
1874,SSA-OBS-FLXTR-TGB09-FLX01,53.98717,-105.11779,TGB-09 non-methane hydrocarbon site near SSA O...
1875,SSA-OBS-FLXTR-TGB09-MIX01,53.98717,-105.11779,TGB-09 mixing ratio site near SSA Old Black Sp...
1876,SSA-OBS-FLXTR-TGB10-HCR01,53.98717,-105.11779,TGB-10 biogenic VOC measurement site near the ...


### Saving the file as .csv 

In [33]:
Boreal_Site_Coords.to_csv('data/Boreal_Site_Coords', sep='\t')