In [None]:
# creating a time series data frame

In [None]:
import pandas as pd
df = pd.read_excel('1614069210_visibility.xlsx', index_col=0, parse_dates=True)
print(df)

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
# the data sheet is recorded from Jan 1 2016 to Dec 31 2020 for every half hour visibility index of the fog

In [None]:
df.dtypes

In [None]:
df

In [None]:
# Add columns with year, month, and weekday name
df['Year'] = df.index.year
df['Month'] = df.index.month
df['Day'] = df.index.day
df['Time'] = df.index.time
df

In [None]:
df.index.time

In [None]:
# time based indexing

In [None]:
df.loc['2016-01'].head()

In [None]:
### Data Visualization

In [None]:
import matplotlib.pyplot as plt

In [None]:
import seaborn as sns
sns.set(rc = {
    'figure.figsize': (8,4)
})

In [None]:
df['visibility_km'].plot(linewidth=2)

In [None]:
import numpy as np

In [None]:
# Data for plotting
t = np.arange(0.0, 2.0, 0.01)
s = 1 + np.sin(2 * np.pi * t)

fig, ax = plt.subplots()
ax.plot(df['Year'], df['visibility_km'])

ax.set(xlabel='years', ylabel='visibility_kms',
       title='Plotting the fog index based on visibility kms per year')
ax.grid()

plt.show()

In [None]:
cols_plot = ['visibility_km']
axes = df[cols_plot].plot(marker='.', alpha=0.5, linestyle='None', figsize=(20, 15), subplots=True)
for ax in axes:
    ax.set_ylabel('visibility index')

In [None]:
df['visibility_km'].max()

In [None]:
for i in range(len(df['visibility_km'])):
    if df['visibility_km'][i] == df['visibility_km'].max():
        print(df['Year'][i])

In [None]:
df['visibility_km'].loc['2016-09'].max()

In [None]:
df.index

In [None]:
sns.regplot(x=df['visibility_km'], y=df['Year'])
plt.show()

In [None]:
sns.jointplot(x=df['visibility_km'], y=df['Year'], kind="kde")

In [None]:
plt.scatter(df['Year'], df['visibility_km'])
plt.xlabel('visibility_index')
plt.ylabel('Year')
plt.show()

In [None]:
df.plot(marker='.', alpha=0.5, linestyle='None', figsize=(10, 12), subplots=True)

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="dark")

# Draw a combo histogram and scatterplot with density contours
f, ax = plt.subplots(figsize=(6, 6))
sns.scatterplot(x=df['Year'], y=df['visibility_km'], s=15, color=".25")
sns.histplot(x=df['Year'], y=df['visibility_km'], bins=150, pthresh=.1, cmap="mako")
sns.kdeplot(x=df['Year'], y=df['visibility_km'], levels=15, color="w", linewidths=1)

In [None]:
df['visibility_km'].head()

In [None]:
# Estimate the mean and standard deviation of fog for each month, and show the result using a graph.

In [None]:
df.head()

In [None]:
np.mean(df['visibility_km'].loc['2016-01'])

In [None]:
import statistics
statistics.mean(df['visibility_km'].loc['2016-1'])

In [None]:
df['Year'].unique()

In [None]:
df['Month'].unique()

In [None]:
fog_records = df['visibility_km']
monthly_analysis = dict()

months = df['Month'].unique()
years = df['Year'].unique()

In [None]:
result = map(lambda month, year: statistics.mean(fog_records.loc[f'{year}-{month}']), months, years)
list(result)

In [None]:
for i in range(len(years)):
    year = years[i]
    mean_s = []
    for j in range(len(months)):
        mean = np.mean(fog_records.loc[f'{years[i]}-{months[j]}'])
        mean_s.append(mean)
        
    
    fig, ax = plt.subplots()
    # ax.scatter(months, mean_s)
    sns.scatterplot(x=months, y=mean_s, s=15, color=".15")
    ax.set(xlabel='month (s)', ylabel='visibility_kms (mean)',
           title=f'{year}')
    plt.show()
    
        # print(years[i], months[j], statistics.mean(fog_records.loc[f'{years[i]}-{months[j]}']))

In [None]:
mean_s

In [None]:
f'{years[0]}-{months[0]}'

In [None]:
from itertools import combinations

In [None]:
for i, j in combinations(range(0, len(years)), 2):
    print(years[i], months[j], statistics.mean(fog_records.loc[f'{years[i]}-{months[j]}']))

In [None]:
for i in range(len(years)):
    mean_si = []
    year = years[i]
    for j in range(len(months)):
        # print(years[i], months[j], np.std(fog_records.loc[f'{years[i]}-{months[j]}']))
        mean_i = np.std(fog_records.loc[f'{years[i]}-{months[j]}'])
        mean_si.append(mean_i)
        
    fig, ax = plt.subplots()
    # ax.scatter(months, mean_s)
    sns.scatterplot(x=months, y=mean_si, s=15, color=".25")
    ax.set(xlabel='month (s)', ylabel='visibility_kms (std)',
           title=f'{year}')
    plt.show()

In [None]:
#A fog event begins when the visibility falls below 1 km and ends when it exceeds 1 km. For the given data,
# (a) identify the number of fog-events
# (b) calculate the duration of each fog-event.

In [None]:
df['visibility_km'].loc['2016-1'].head()

In [None]:
df['visibility_km'].loc['2016-1'].tail()

In [None]:
df.loc['2016-01']

In [None]:
df['Duration'] = pd.to_datetime(df['Time'], format='%H:%M:%S').dt.hour
df

In [None]:
fog_event_counter = 0
duration = []
timer = []
for i in range(len(years)):
    year = years[i]
    fog_event = 0
    for j in range(len(months)):
        month = months[j]
        if (fog_records.loc[f'{years[i]}-{months[j]}'].empty == False):
            if (fog_records.loc[f'{years[i]}-{months[j]}'] < 1).any():
                fog_event = 1
                continue
            elif (fog_records.loc[f'{years[i]}-{months[j]}'] > 1).any():
                fog_event_counter += fog_event
                duration.append(df["Duration"].loc[f"{year}-{month}"])
                timer.append(df["Time"].loc[f"{year}-{month}"])
                break
    fog_event_counter += fog_event_counter

In [None]:
# Five years of visibility data for a site is attached. Analyze the data to answer the following questions. Plot graphs or tables to support your answer.
# Q1. Which months are likely to have fog?
# Q2. What is the usual time in a day for a fog event to start?
# Q3. What is the usual (average) duration of a fog event?
# Q4. The number of fog events at the site has increased or decreased during the last five years.

### Q1:
- Based on the graph for means of different months in the years from 2016 - 2020 we infer that the following months have possibility of incurring fog:
    * 2016: June, August and September
    * 2017: April, May, July, August and September
    * 2018: April, May, August, September
    * 2019: May, June, August, September
    * 2020: April, May, June, July
    
- `April`,`May`, `June`, `August`, `September` months have high possibility of fog occurrence

In [None]:
### Q2:
import datetime
for i in range(len(timer)):
    max_timer = timer[i].max()

### Approximately the usual time for a fog event to start is `11am`


In [None]:
### Q3:
### Based on the mean of the fog event data we have:
sum = 0
for i in range(len(duration)):
    sum += statistics.mean(duration[i])
fog_event_duration = sum/len(duration)

### Approximately the time of fog_event occurrence is `11 minutes and 30 seconds`

### Q4:
Based on the graph trends, it's obvious that the fog events have decreased significantly probably because of the increasing temperatures.