In [30]:
# Import the libraries used for data, reading JSON, converting to csv
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import json
import csv

In [31]:
# CIG colors - used for graphs
colors = ['#1A3D59', '#00548A', '#007CBA', '#9A9400', '#6D3527', '#9F3122', '#D75E00', '#F98D29', '#B41782', '#EBAA20', '#E7417A']

In [32]:
# monthlyCountList used for Tool Statistics (Downloads, Redirect Counts)
# Open and load the data
f = open('json/monthlyCountList.json',)
monthly_count_list_data = json.load(f)

In [33]:
# First, Downloads data under world_map_list
dat = monthly_count_list_data['world_map_list']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/downloads.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file using the first row
header = dat[0].keys()
csv_writer.writerow(header)
        
for d in dat:
    # Writing each row value of to the CSV file
    # This and Releases are different from the other JSON files because there are nested dicts
    csv_writer.writerow(d.values())
 
data_file.close()

In [34]:
# Downloads map code here
# TODO
# Convert the original data to a dataframe
dat = monthly_count_list_data['world_map_list']
df = pd.DataFrame(dat)

In [35]:
# Second, Monthly Redirects data under monthly_count
dat = monthly_count_list_data['monthly_count']

# Create new csv file if it does not exist in a csv folder, then create writer for it
data_file = open('csv/monthlyRedirects.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "redirects"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [36]:
# Monthly redirects code here
# TODO
df = pd.DataFrame(dat, columns=['date', 'count'])

#fig = px.histogram(df, x='date', y="count")
#fig.show()

In [37]:
# monthlyCommitCountList used for Repository Statistics (Lifetime Statistics, Author Contributions, Commits)
# Open and load the data
f = open('json/monthlyCommitCountList.json')
monthly_commit_count_list_data = json.load(f)

In [38]:
# Lifetime Statistics
# TODO


In [39]:
# Contributors under monthlyCommitCountList
dat = monthly_commit_count_list_data['contributors']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/contributors.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["user", "contributions"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [40]:
# Convert the original data to a dataframe
dat = monthly_commit_count_list_data['contributors']
df = pd.DataFrame(dat.items(), columns=['user', 'contributions'])

# Plot pie chart using Plotly - values are the number of contributions each user had
# Color sequence - Plotly does not allow for repetition of one list of colors, so we must repeat it manually with calculations:
# 11 colors, so get the length of the dataframe / 11 and multiply the list of colors by that length to repeat it
# Add the remainder colors using % 11 to the list 
fig = px.pie(df, values='contributions', names='user', color_discrete_sequence=(colors*int(len(df) / 11) + colors[0:(len(df) % 11)]), title='Author Contributions')
# Traces - text inside the pie with values (not percentage), hover popup contains user, contributions, percent
fig.update_traces(textposition='inside', textinfo='value', hovertemplate = "%{label}<br>%{value}<br>%{percent}")
# Display figure
fig.show()

In [12]:
# monthly_commit_count under monthlyCommitCountList
dat = monthly_commit_count_list_data['monthly_commit_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_commit_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "commit_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [13]:
# Convert the original data to a dataframe
dat = monthly_commit_count_list_data['monthly_commit_count']
df = pd.DataFrame(dat.items(), columns=['month', 'commit_count'])

# Create a bar graph using Plotly, with date as x-axis and commit count as y-axis
# Plotly takes a list of colors and uses the first one
fig = px.bar(df, x='month', y='commit_count', color_discrete_sequence=colors, title='Commits')
# Change the hovertext to show (month, commit_count)
fig.update_traces(hovertemplate = "(%{label}, %{value})")
# Display figure
fig.show()

In [14]:
# monthlyIssueCountList used for Repository Statistics (Issues)
# Open and load the data
f = open('json/monthlyIssueCountList-issues.json')
monthly_issue_count_list = json.load(f)

In [15]:
# monthly_issues_only_count under monthlyIssueCountList
dat = monthly_issue_count_list['monthly_issues_only_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_issues_only_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "issue_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [16]:
# monthly_closed_issues_only_count under monthlyIssueCountList
dat = monthly_issue_count_list['monthly_closed_issues_only_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_closed_issues_only_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "issue_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [17]:
# monthly_open_issues_only_count under monthlyIssueCountList
dat = monthly_issue_count_list['monthly_open_issues_only_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_open_issues_only_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "issue_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [18]:
month_data = monthly_issue_count_list['monthly_issues_only_count']
closed_data = monthly_issue_count_list['monthly_closed_issues_only_count']
open_data = monthly_issue_count_list['monthly_open_issues_only_count']

df1 = pd.DataFrame(month_data.items(), columns=['month', 'issue_count'])
df2 = pd.DataFrame(closed_data.items(), columns=['month', 'issue_count'])
df3 = pd.DataFrame(open_data.items(), columns=['month', 'issue_count'])
dfs = {"month" : df1, "closed": df2, "open" : df3}

# plot the data
fig = go.Figure()
fig.update_layout(title='Issues')
#fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)',})

fig = fig.add_trace(go.Bar(x = df1["month"], y = df1["issue_count"], name='All Issues', marker_color=colors[0]))
fig = fig.add_trace(go.Bar(x = df2["month"], y = df2["issue_count"], name='Closed Issues', marker_color=colors[2]))
fig = fig.add_trace(go.Bar(x = df3["month"], y = df3["issue_count"], name='Open Issues', marker_color=colors[5]))
fig.show()

In [19]:
# monthlyIssueCountList used for Repository Statistics (Pull Requests)
# Open and load the data
f = open('json/monthlyIssueCountList-pull.json')
monthly_issue_count_list = json.load(f)

In [20]:
# monthly_issues_only_count under monthlyIssueCountList
dat = monthly_issue_count_list['monthly_pull_requests_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_pull_requests_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "pull_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [21]:
# monthly_pull_requests_count under monthlyIssueCountList-pull
dat = monthly_issue_count_list['monthly_closed_pull_requests_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_closed_pull_requests_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "pull_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [22]:
# monthly_pull_requests_count under monthlyIssueCountList-pull
dat = monthly_issue_count_list['monthly_open_pull_requests_count']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/monthly_open_pull_requests_count.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file 
header = ["month", "pull_count"]
csv_writer.writerow(header)

# Convert each key/value pair in the dict to a list
dat = list(dat.items())
        
for d in dat:
    # Writing each row value of to the CSV file
    csv_writer.writerow(d)
 
data_file.close()

In [23]:
month_data = monthly_issue_count_list['monthly_pull_requests_count']
closed_data = monthly_issue_count_list['monthly_closed_pull_requests_count']
open_data = monthly_issue_count_list['monthly_open_pull_requests_count']

df1 = pd.DataFrame(month_data.items(), columns=['month', 'pull_count'])
df2 = pd.DataFrame(closed_data.items(), columns=['month', 'pull_count'])
df3 = pd.DataFrame(open_data.items(), columns=['month', 'pull_count'])
dfs = {"month" : df1, "closed": df2, "open" : df3}

# plot the data
fig = go.Figure()
fig.update_layout(title='Pull Requests (PR)')
#fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)',})

fig = fig.add_trace(go.Bar(x = df1["month"], y = df1["pull_count"], name='All PR', marker_color=colors[0]))
fig = fig.add_trace(go.Bar(x = df2["month"], y = df2["pull_count"], name='Closed PR', marker_color=colors[2]))
fig = fig.add_trace(go.Bar(x = df3["month"], y = df3["pull_count"], name='Open PR', marker_color=colors[5]))
fig.show()

In [24]:
# TODO: Releases - Asset Downloads

In [25]:
# releasesCountList used for Releases - Asset Downloads
# Open and load the data
f = open('json/releasesCountList.json',)
releases_count_list = json.load(f)

In [26]:
# First, Downloads data under world_map_list
dat = releases_count_list['releases']

# Create new csv file if it does not exist, then create writer for it
data_file = open('csv/releases_count_list.csv', 'w')
csv_writer = csv.writer(data_file)
 
# Write the headers into the file using the first row
header = dat[0].keys()
csv_writer.writerow(header)
        
for d in dat:
    # Writing each row value of to the CSV file
    # This and Releases are different from the other JSON files because there are nested dicts
    csv_writer.writerow(d.values())
 
data_file.close()

In [27]:
# Convert the original data to a dataframe
dat = releases_count_list['releases']
# No longer items - takes list of dicts and converts to dataframe based on keys listed (columns)
df = pd.DataFrame(dat, columns=['tag_name', 'asset_download_count'])

# Plot pie chart using Plotly - values are the number of downloads
# Color sequence - Plotly does not allow for repetition of one list of colors, so we must repeat it manually with calculations:
# 11 colors, so get the length of the dataframe / 11 and multiply the list of colors by that length to repeat it
# Add the remainder colors using % 11 to the list 
fig = px.pie(df, values='asset_download_count', names='tag_name', color_discrete_sequence=(colors*int(len(df) / 11) + colors[0:(len(df) % 11)]), title='Releases - Asset Downloads')
# Traces - text inside the pie with values (not percentage), hover popup contains version, downloads, percent
fig.update_traces(textposition='inside', textinfo='value', hovertemplate = "%{label}<br>%{value}<br>%{percent}")
# Display figure
fig.show()

In [28]:
# Convert the original data to a dataframe
dat = releases_count_list['releases']
# No longer items - takes list of dicts and converts to dataframe based on keys listed (columns)
df = pd.DataFrame(dat, columns=['tag_name', 'author_login', 'release_url', 'created_at', 'asset_download_count'])

In [29]:
fig = go.Figure(data=[go.Table(
    header=dict(values=list(['Release Tag Name', 'Author', 'Creation Date', 'Asset Download Count']),
                align='center'),
    cells=dict(values=[df.tag_name, df.author_login, df.created_at, df.asset_download_count],
               align='center'''))
])
fig.show()