# Imports



In [None]:
import pandas as pd
from datetime import datetime, timedelta
import calendar



## Functions

In [None]:
def generate_weekly_commits_calendar(start_date, end_date):
  all_days = []
  # Generate all days between start_date and end_date
  current_date = start_date
  while current_date <= end_date:
      # Format the current date in YY-MM-DD format
      formatted_date = current_date.strftime("%Y-%m-%d")
      all_days.append(formatted_date)
      current_date += timedelta(days=1)
  weekly_commits = {day : 0 for day in all_days}

  return weekly_commits

In [None]:
def add_weekly_commits(start_date, end_date):
  weekly_commits = generate_weekly_commits_calendar(start_date, end_date)

  for i, r in df.iterrows():
      for c in r['commits']:
        if c['author-login'] != 'daltonserey': 
          date = c['date'].split('T')[0]
          weekly_commits[date] += 1

  return weekly_commits

In [None]:
def create_df_weekly(start_date, end_date):
  weekly_commits = add_weekly_commits(start_date, end_date)
  #create a df using weekly_commits
  df_weekly = pd.DataFrame.from_dict(weekly_commits, orient='index', columns=['commits'])
  df_weekly.index = pd.to_datetime(df_weekly.index)
  df_weekly = df_weekly.sort_index()
  return df_weekly

In [None]:
def get_weekly_days(start_date, end_date):
  # Initialize a list to store the first day of each week
  first_day_of_weeks = []

  # Find the first day of each week within the date range
  current_date = start_date
  while current_date <= end_date:
      if current_date.weekday() == 6:
          first_day_of_weeks.append(current_date.strftime('%Y-%m-%d'))
      current_date += timedelta(days=1)

  return first_day_of_weeks


In [None]:
def get_commits_per_week(start_date, end_date):
  weekly_days = get_weekly_days(start_date, end_date)
  commits_per_week = {week:0 for week in weekly_days}
  commits_per_week_list = list(commits_per_week)
  i = 0
  for index, row in march_to_april.iterrows():
    if(i < len(commits_per_week_list) - 1):
      current_week = commits_per_week_list[i]
      current_week_timestamp = datetime.strptime(current_week,'%Y-%m-%d')
      next_week = commits_per_week_list[i+1]
      next_week_timestamp = datetime.strptime(next_week,'%Y-%m-%d')
      if(current_week_timestamp <= index < next_week_timestamp):
        commits_per_week[current_week] += row["commits"]
      else:
        i += 1
        current_week = commits_per_week_list[i]
        commits_per_week[current_week] += row["commits"]
  
  return commits_per_week


In [None]:
def get_commits_per_month(df):
  commits_per_month = {}
  for index, row in df.iterrows():
    current_month = calendar.month_name[index.month]
    if(current_month not in commits_per_month):
      commits_per_month[current_month] = row["commits"]
    else:
      commits_per_month[current_month] += row["commits"]
  
      
  return commits_per_month

### Loading Data

In [None]:
df = pd.read_json('https://docs.google.com/uc?export=download&id=1Pjp6CQTEQvoJFmQYrgzMQugPYvlVR3sQ')
df

Unnamed: 0,id,name,visibility,commits,commits-weekly-activity
0,620071292,andrielly.lucena,private,[{'sha': '83266b157220d86beff8d372cb6f3e612787...,"[{'author': {'login': 'daltonserey', 'id': 801..."
1,620070811,abraao.araujo,private,[{'sha': '0f44c351cb8b50f871daba051a973ae70e98...,"[{'author': {'login': 'daltonserey', 'id': 801..."
2,620071303,bianca.pacheco,private,[{'sha': 'd510f37a9b5ba1995ce41ae4fced75136945...,"[{'author': {'login': 'daltonserey', 'id': 801..."
3,620071349,guilherme.peixoto,private,[{'sha': '47290bca73faa7501ad9c8d851eb3e795833...,"[{'author': {'login': 'daltonserey', 'id': 801..."
4,620071305,carlos.ribeiro,private,[{'sha': 'dc0dca4195e76f11303f316cb72d701f6eb4...,"[{'author': {'login': 'daltonserey', 'id': 801..."
5,620071324,carmem.neri,private,[{'sha': '7e27f1bf683c168f8b8a5de72d25553fc545...,"[{'author': {'login': 'daltonserey', 'id': 801..."
6,620071338,cilas.marques,private,[{'sha': '90b351ac8c45733300ecbcc0bf766d94788a...,"[{'author': {'login': 'cilasmarques', 'id': 38..."
7,620071315,carmelita.medeiros,private,[{'sha': 'db76bc8079b5f1e2842dc1dbef50899744f9...,"[{'author': {'login': 'daltonserey', 'id': 801..."
8,620071366,joao.pedro.santos,private,[{'sha': 'fe36ac63d01d5d47cb7e9c157ca2d204e801...,"[{'author': {'login': 'daltonserey', 'id': 801..."
9,620071375,joao.victor.lucena,private,[{'sha': 'e032572cb960664e9de1c42da24986fe0aef...,"[{'author': {'login': 'daltonserey', 'id': 801..."


In [None]:
df = df[df['name'] != 'YARA-Onboard']
df = df[df['name'] != 'Github-Metric-Collector']
df

Unnamed: 0,id,name,visibility,commits,commits-weekly-activity
0,620071292,andrielly.lucena,private,[{'sha': '83266b157220d86beff8d372cb6f3e612787...,"[{'author': {'login': 'daltonserey', 'id': 801..."
1,620070811,abraao.araujo,private,[{'sha': '0f44c351cb8b50f871daba051a973ae70e98...,"[{'author': {'login': 'daltonserey', 'id': 801..."
2,620071303,bianca.pacheco,private,[{'sha': 'd510f37a9b5ba1995ce41ae4fced75136945...,"[{'author': {'login': 'daltonserey', 'id': 801..."
3,620071349,guilherme.peixoto,private,[{'sha': '47290bca73faa7501ad9c8d851eb3e795833...,"[{'author': {'login': 'daltonserey', 'id': 801..."
4,620071305,carlos.ribeiro,private,[{'sha': 'dc0dca4195e76f11303f316cb72d701f6eb4...,"[{'author': {'login': 'daltonserey', 'id': 801..."
5,620071324,carmem.neri,private,[{'sha': '7e27f1bf683c168f8b8a5de72d25553fc545...,"[{'author': {'login': 'daltonserey', 'id': 801..."
6,620071338,cilas.marques,private,[{'sha': '90b351ac8c45733300ecbcc0bf766d94788a...,"[{'author': {'login': 'cilasmarques', 'id': 38..."
7,620071315,carmelita.medeiros,private,[{'sha': 'db76bc8079b5f1e2842dc1dbef50899744f9...,"[{'author': {'login': 'daltonserey', 'id': 801..."
8,620071366,joao.pedro.santos,private,[{'sha': 'fe36ac63d01d5d47cb7e9c157ca2d204e801...,"[{'author': {'login': 'daltonserey', 'id': 801..."
9,620071375,joao.victor.lucena,private,[{'sha': 'e032572cb960664e9de1c42da24986fe0aef...,"[{'author': {'login': 'daltonserey', 'id': 801..."


In [None]:
march_to_april = create_df_weekly(datetime(2023, 3, 28), datetime(2023, 4, 24))
all_year_until_24 = create_df_weekly(datetime(2023, 1, 1), datetime(2023, 4, 24))

In [None]:
print(march_to_april["commits"].tolist())
print(all_year_until_24["commits"].tolist())

[2, 36, 28, 38, 4, 14, 42, 20, 58, 8, 38, 16, 26, 34, 26, 34, 12, 4, 0, 0, 38, 24, 20, 20, 0, 2, 0, 4]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 36, 28, 38, 4, 14, 42, 20, 58, 8, 38, 16, 26, 34, 26, 34, 12, 4, 0, 0, 38, 24, 20, 20, 0, 2, 0, 4]


- Commits por semana

In [None]:
# Ignora essa ultima semana também, era só pra demarcar o fim da semana 5
start_date = datetime(2023, 3, 26)
end_date = datetime(2023, 4, 30)
commits_per_week = get_commits_per_week(start_date, end_date)
commits_per_week

{'2023-03-26': 108,
 '2023-04-02': 196,
 '2023-04-09': 136,
 '2023-04-16': 104,
 '2023-04-23': 4,
 '2023-04-30': 0}

- Taxa de crescimento por semana

In [None]:
# Aqui como a gente não sabia se ia usar de uma das duas formas a gente gera o gráfico com as duas, posso gerar isso também

- Commits por mês

In [None]:
commits_per_month = get_commits_per_month(march_to_april)
commits_per_month

{'March': 104, 'April': 444}