# NASA SARP Comprehensive Python Cheatsheet (week 1)

It covers:
- Python basics: variables, conditionals, lists, dictionaries
- Tabular data using Pandas
- Scientific multidimensional data using Xarray
- Advanced data handling techniques
- NASA/Earth science-specific examples


## SECTION 1: Python Foundations

### 🔹 Variables and Data Types

In [12]:
name = "SARP"
age = 17
pm_value = 15.4
is_safe = pm_value < 35
print(f"Team: {name}, PM2.5: {pm_value}, Safe? {is_safe}")
#f is for decimals 

Team: SARP, PM2.5: 15.4, Safe? True


### 🔹 Type Conversion

In [13]:
value = "100"
value_int = int(value)
print(value_int + 20)

120


### 🔹 Conditional Statements

In [14]:
temp = 87
if temp > 90:
    print("Too Hot")
elif temp > 75:
    print("Warm")
else:
    print("Mild")

Warm


### 🔹 Lists and List Operations

In [15]:
sensors = ["NO2", "CO", "PM2.5"]
sensors.append("O3")
sensors.remove("CO")
print(sensors)

['NO2', 'PM2.5', 'O3']


### 🔹 Dictionaries and Access

In [16]:
reading = {
    "location": "Pasadena", 
    "key2": 42.7, 
    "unit": "µg/m³"
}

print(reading['key2']) #returns 42.7

42.7


## SECTION 2: Tabular Data Analysis with Pandas

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

### 🔹 Load Titanic Dataset

In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

# Example 1: Here we are showing how to import data that is hosted somewhere else
# (aka. website such as githubusercontent)
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv'
df1 = pd.read_csv(url)
df1.head()


# Example 2: Here we are showing how to import data that is downloaded locally on your machine
df2 = pd.read_csv("example1.csv")
df2.head()

Unnamed: 0,Name,Column1,Column2,Column3
0,Test,22,123,123
1,TEst2,2,12,123
2,Test3,3,421,123
3,Test4,4,34,34


### 🔹 Data Inspection & Summary

In [19]:
df.info()
df.describe()
df.isnull().sum()

NameError: name 'df' is not defined

### 🔹 Handling Missing Data

In [None]:
df['age'] = df['age'].fillna(df['age'].mean())
df.dropna(subset=['embarked'], inplace=True)

### 🔹 Remove Duplicates

In [None]:
df = df.drop_duplicates()

### 🔹 Convert Data Types

In [None]:
df['sex'] = df['sex'].astype('category')

### 🔹 Outlier Removal using IQR

In [None]:
Q1 = df['fare'].quantile(0.25)
Q3 = df['fare'].quantile(0.75)
IQR = Q3 - Q1
filtered = df[(df['fare'] >= Q1 - 1.5*IQR) & (df['fare'] <= Q3 + 1.5*IQR)]

### 🔹 Feature Engineering

In [None]:
df['family_size'] = df['sibsp'] + df['parch'] + 1
df['fare_norm'] = (df['fare'] - df['fare'].min()) / (df['fare'].max() - df['fare'].min())

### 🔹 Filtering Rows with Conditions

In [None]:
high_fare = df[df['fare'] > 100]
high_fare[['sex', 'fare']].head()

### 🔹 Grouping and Aggregation

In [None]:
df.groupby('sex')['survived'].mean()

### 🔹 Sorting Data

In [None]:
df.sort_values(by='age', ascending=False).head()

### 🔹 Pandas Visualizations

In [None]:
sns.histplot(df['age'], bins=20, kde=True)
plt.title('Age Distribution')
plt.show()

### 🔹 Save and Load

In [None]:
df.to_csv("cleaned_titanic.csv", index=False)
pd.read_csv("cleaned_titanic.csv")

## SECTION 3: Scientific Data Analysis with Xarray

In [None]:
import xarray as xr
xr.set_options(display_style='text')

In [None]:
url = 'http://test.opendap.org/opendap/data/nc/coads_climatology.nc'
ds = xr.open_dataset(url, decode_times=False)
ds

In [None]:
sst = ds['SST']
sst_clean = sst.where(sst > -1e30)

### 🔹 Subset by Time and Region

In [None]:
sst_jan = sst_clean.isel(TIME=0)
sst_region = sst_jan.sel(COADSY=slice(-10, 10), COADSX=slice(120, 240))

In [None]:
sst_region.plot(cmap='coolwarm')
plt.title("Sea Surface Temp - Tropical Pacific")
plt.show()

### 🔹 Compute Stats

In [None]:
sst_mean = sst_clean.mean(dim='TIME')
sst_mean.plot()
plt.title("Mean SST")
plt.show()

In [None]:
flat = sst_region.values.flatten()
flat = flat[~np.isnan(flat)]
plt.hist(flat, bins=30)
plt.title("Histogram of SST")
plt.show()

### 🔹 Export Subset

In [None]:
sst_region.to_dataset(name='sst_subset').to_netcdf("sst_subset.nc")