## Reading Data using pandas

In [None]:
import pandas as pd

url = "Dataset/imports-85.csv"
dt = pd.read_csv(url)
dt.head()

## Data Selection

In [None]:
import numpy as np
import pandas as pd

In [None]:
url = "Dataset/RadNet_Laboratory_Analysis.csv" #https://www.epa.gov/
dt = pd.read_csv(url)
dt.head()

In [None]:
dt['State'].unique()

In [None]:
dt[dt.State == "MN"]

In [None]:
dt[(dt.State == 'CA') & (dt['Sample Type'] == 'Drinking Water')]

In [None]:
dt.loc[(dt.State == "MN")]["I-131"]

In [None]:
dt.loc[dt.State == "MN", "I-131"]

In [None]:
dt[['I-132']].head()

## Exploring Data Type

In [None]:
import numpy as np
import pandas as pd

In [None]:
url = "Dataset/RadNet_Laboratory_Analysis.csv" #https://www.epa.gov/
dt = pd.read_csv(url)

In [None]:
dt.dtypes

In [None]:
dt['Date Posted'] = pd.to_datetime(dt['Date Posted'])

In [None]:
dt['Date Collected'] = pd.to_datetime(dt['Date Collected'])

In [None]:
columns = dt.columns
id_columns = ['State', 'Location', 'Date Posted', 'Date Collected', 'Sample Type', 'Unit']
columns = list(set(columns) - set(id_columns))
columns

In [None]:
dt['Cs-134'] = dt['Cs-134'].apply(lambda x: np.nan if x == "Non-detect" else x)
dt.loc[:, columns] = dt.loc[:,columns].applymap(lambda x: np.nan if x == 'Non-detect' else x)
dt.loc[:, columns] = dt.loc[:,columns].applymap(lambda x: np.nan if x == 'ND' else x)

In [None]:
for col in columns:
    dt[col] = pd.to_numeric(dt[col])

In [None]:
dt.dtypes

In [None]:
dt['State'] = dt['State'].astype('category')
dt['Location'] = dt['Location'].astype('category')
dt['Unit'] = dt['Unit'].astype('category')
dt['Sample Type'] = dt['Sample Type'].astype('category')

In [None]:
dt.dtypes

## Aggregation and Grouping

In [None]:
import numpy as np
import pandas as pd

In [None]:
url = "Dataset/RadNet_Laboratory_Analysis.csv" #https://www.epa.gov/
dt = pd.read_csv(url)

In [None]:
dt.groupby('State')

In [None]:
dt.groupby('State')['Cs-134'].head()

In [None]:
dt.groupby('State').mean().head(20)

In [None]:
dt.groupby(['State','Location']).agg({
    'Cs-134':['mean', 'std'],
    'Te-129':['min', 'max']
}).head(20)

## Exporting Data using pandas to Differents Formats

In [None]:
import numpy as np
import pandas as pd

In [None]:
url = "Dataset/RadNet_Laboratory_Analysis.csv" #https://www.epa.gov/
dt = pd.read_csv(url)

In [None]:
columns = dt.columns
id_columns = ['State', 'Location','Date Posted','Date Collected','Sample Type','Unit']
columns = list(set(columns) - set(id_columns))
columns

In [None]:
dt['Cs-134'] = dt['Cs-134'].apply(lambda x: np.nan if x == "Non-detect" else x)
dt.loc[:, columns] = dt.loc[:,columns].applymap(lambda x: np.nan if x == 'Non-detect' else x)
dt.loc[:, columns] = dt.loc[:,columns].applymap(lambda x: np.nan if x == 'ND' else x)

In [None]:
dt.loc[:, ['State', 'Location', 'Sample Type', 'Unit']] = dt.loc[:, ['State', 'Location', 'Sample Type', 'Unit']].applymap(lambda x: x.strip())

In [None]:
dt['Date Posted'] = pd.to_datetime(dt['Date Posted'])
dt['Date Collected'] = pd.to_datetime(dt['Date Collected'])

In [None]:
for col in columns:
    dt[col] = pd.to_numeric(dt[col])

In [None]:
dt['State'] = dt['State'].astype('category')
dt['Location'] = dt['Location'].astype('category')
dt['Sample Type'] = dt['Sample Type'].astype('category')
dt['Unit'] = dt['Unit'].astype('category')

In [None]:
dt.to_csv('RadiationDataset_clean.csv', 
          index=False, sep=';', 
          encoding='utf-8')