# Reading AERONET data with pandas

## Import python packages

In [None]:
import pandas as pd
import s3fs
import xarray as xr

## Connect to bucket (anonymous login for public data only)

In [None]:
fs = s3fs.S3FileSystem(
    anon=True, client_kwargs={"endpoint_url": "https://climate.uiogeo-apps.sigma2.no/"}
)

s3path = "ESGF/obs4MIPs/AERONET/AeronetSunV3Lev1.5.daily/*.lev30"
remote_files = fs.glob(s3path)

## Access data files

In [None]:
# Iterate through remote_files to create a fileset
fileset = [fs.open(file) for file in remote_files]

## Station selection

In [None]:
# Get index of the first file containing some substring
matching_index = [i for i, s in enumerate(remote_files) if "Lille" in s][0]
matching_index

## Data reading with pandas

In [None]:
# Read file with pandas
# Note: One must skip the first rows (skiprows), which do not contain as many columns as data-rows
df = pd.read_csv(fileset[matching_index], skiprows=6)
df.head()

In [None]:
# Check which keys are available
df.keys()

## Create proper Date index

In [None]:
# Get date from Date(dd:mm:yyyy)
df["Date"] = pd.to_datetime(df["Date(dd:mm:yyyy)"], format="%d:%m:%Y")

# Set date as index
df.set_index("Date", inplace=True)

## Plot time series using pandas

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(18, 6))
df["AOD_500nm"].plot(ylim=(0, 1))
df["AOD_500nm"].resample("1M").mean().plot(
    ylim=(0, 1), xlim=("01-01-2005", "31-12-2021"), lw=3
)
plt.title("Lille", weight="bold")
plt.ylabel("AOD@500nm")