In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = temp_df = pd.DataFrame()

for year in range(2010, 2022):
  
  dataset_filename = f'dataset/commit_data_{year}.csv'
  
  try:
    temp_df = pd.read_csv(dataset_filename)
  except:
    print("commit_data.csv does not exist.")
  
  df = df.append(temp_df, ignore_index = True)

df

Unnamed: 0,date,commit_count
0,2010-01-01,402487
1,2010-01-02,646891
2,2010-01-03,282974
3,2010-01-04,2391253
4,2010-01-05,1959631
...,...,...
4378,2021-12-27,1347898
4379,2021-12-28,1462890
4380,2021-12-29,1452643
4381,2021-12-30,1394772


In [2]:
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,commit_count
0,2010-01-01,402487
1,2010-01-02,646891
2,2010-01-03,282974
3,2010-01-04,2391253
4,2010-01-05,1959631


In [3]:
first_lockdown_dates = [
    pd.to_datetime('2020-01-23'),    # China
    pd.to_datetime('2020-03-17'),    # Canada and France
    pd.to_datetime('2020-03-19'),    # USA
    pd.to_datetime('2020-03-22'),    # Germany
    pd.to_datetime('2020-03-23'),    # Australia and UK
    pd.to_datetime('2020-03-24'),    # India
    pd.to_datetime('2020-03-29'),    # Russia
    pd.to_datetime('2020-05-05')     # Brazil    
]

# vline_colors = [ 'red', 'darkgoldenrod', 'y', 'olive', 'forestgreen', 'teal', 'dodgerblue', 'magenta' ]
vline_colors = [ 'red', 'lightpink', 'lime', 'gold', 'darkgreen', 'aqua', 'fuchsia', 'blue' ]

vline_labels = [ 'China', 'Canada and France', 'USA', 'Germany', 'Australia and UK', 'India', 'Russia', 'Brazil' ]

In [4]:
import matplotlib.dates as dates
%matplotlib qt

fig, ax = plt.subplots()
ax.plot ( df['date'], df['commit_count'], color = 'gray', label = "Commit Count" )
fig.autofmt_xdate()
plt.title ( 'Date vs Commit Count before and after the Initial Covid Lockdown of Top 10 GitHub Using Nations', size = 18 )
plt.xlabel ( 'Date', size = 15 )
plt.ylabel ( 'Commit Count', size = 15 )

for idx in range(len(first_lockdown_dates)):
    plt.axvline( x=first_lockdown_dates[idx], color=vline_colors[idx], linestyle='dotted', linewidth=2, 
                label=f'First Covid Lockdown - {vline_labels[idx]}')

# Get values for the trend line analysis
x_dates = df['date']
x_num = dates.date2num(x_dates)
trend = np.polyfit(x_num, df['commit_count'], 1)
fit = np.poly1d(trend)

x_fit = np.linspace(x_num.min(), x_num.max())
plt.plot(dates.num2date(x_fit), fit(x_fit), color='teal', linestyle='dashed', label='Trendline')
plt.legend ( loc = 'best' )
plt.show()