In [1]:
import pandas as pd
import datetime
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
list_subs = ['bitcoin','bitcoinbeginners','bitcoinmarkets','bitcoinmining','btc']

In [3]:
from datetime import datetime, timedelta

# Start and end dates
start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
end_date = datetime.strptime('2022-12-31', '%Y-%m-%d')

# Create an empty dictionary with dates as keys
date_dict = {str(date.date()): [] for date in [start_date + timedelta(days=x) for x in range((end_date-start_date).days + 1)]}

In [4]:
for sub in list_subs:
    for day in date_dict.keys():
        try:
            df = pd.read_csv(f'Data/Cleaned_Data/crypto_bitcoin/network_data/{sub}_{day}.csv',parse_dates=['posted_on_x'])
            date_dict[day].append(df)
        except:
            continue

In [5]:
result_dict = {}

In [6]:
for key, df_list in date_dict.items():
    result_dict[key] = pd.concat(df_list, ignore_index=True)

In [7]:
columns = ["Date","Number of Nodes", "Number of Edges", "Average Degree", "Average Clustering Coefficient"]
graph_df = pd.DataFrame(columns=columns)

In [8]:
for key in date_dict.keys():
    final_df = result_dict[key]
    # Extract relevant columns from the DataFrame
    columns_to_extract = ['author_x', 'author_y']
    conversation_df = final_df[columns_to_extract]

    # Create a directed graph
    G = nx.DiGraph()

    # Add nodes from both 'author_x' and 'author_y' columns
    sub_authors_set = conversation_df['author_x'].dropna()
    com_authors_set = conversation_df['author_y'].dropna()
    authors_set = set(pd.concat([sub_authors_set,com_authors_set],ignore_index=True))
    G.add_nodes_from(authors_set)

    # Add edges based on conversations
    for _, row in conversation_df.iterrows():
        if pd.notnull(row['author_x']) and pd.notnull(row['author_y']):
            G.add_edge(row['author_x'], row['author_y'])

    # Calculate graph metrics
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    average_degree = sum(dict(G.degree()).values()) / num_nodes
    average_clustering_coefficient = nx.average_clustering(G)
    new_row = {"Date":key,
               "Number of Nodes": num_nodes,
               "Number of Edges": num_edges,
               "Average Degree": average_degree,
               "Average Clustering Coefficient": average_clustering_coefficient}
    new_df = pd.DataFrame([new_row])
    graph_df = pd.concat([graph_df,new_df], ignore_index=True)

In [9]:
graph_df

Unnamed: 0,Date,Number of Nodes,Number of Edges,Average Degree,Average Clustering Coefficient
0,2022-01-01,1430,1633,2.283916,0.001082
1,2022-01-02,649,756,2.329738,0.000633
2,2022-01-03,923,1112,2.409534,0.002355
3,2022-01-04,739,853,2.308525,0.000826
4,2022-01-05,1569,1885,2.402804,0.000872
...,...,...,...,...,...
360,2022-12-27,309,335,2.168285,0.003991
361,2022-12-28,543,653,2.405157,0.003979
362,2022-12-29,320,381,2.381250,0.000899
363,2022-12-30,593,744,2.509275,0.001082


In [11]:
time_series_nodes = graph_df[['Date','Number of Nodes']]
time_series_nodes.to_csv("Data/Time_Series/crypto_bitcoin/nodes.csv",index=False)

In [12]:
time_series_edges = graph_df[['Date','Number of Edges']]
time_series_edges.to_csv(f"Data/Time_Series/crypto_bitcoin/edges.csv",index=False)

In [13]:
graph_df['Average Degree'] = round(graph_df['Average Degree'],2)
graph_df

Unnamed: 0,Date,Number of Nodes,Number of Edges,Average Degree,Average Clustering Coefficient
0,2022-01-01,1430,1633,2.28,0.001082
1,2022-01-02,649,756,2.33,0.000633
2,2022-01-03,923,1112,2.41,0.002355
3,2022-01-04,739,853,2.31,0.000826
4,2022-01-05,1569,1885,2.40,0.000872
...,...,...,...,...,...
360,2022-12-27,309,335,2.17,0.003991
361,2022-12-28,543,653,2.41,0.003979
362,2022-12-29,320,381,2.38,0.000899
363,2022-12-30,593,744,2.51,0.001082


In [15]:
time_series_degree = graph_df[['Date','Average Degree']]
time_series_degree.to_csv(f"Data/Time_Series/crypto_bitcoin/avg_degree.csv",index=False)

In [11]:
graph_df['Average Clustering Coefficient'] = round(graph_df['Average Clustering Coefficient'],4)
graph_df

Unnamed: 0,Date,Number of Nodes,Number of Edges,Average Degree,Average Clustering Coefficient
0,2022-01-01,1430,1633,2.283916,0.0011
1,2022-01-02,649,756,2.329738,0.0006
2,2022-01-03,923,1112,2.409534,0.0024
3,2022-01-04,739,853,2.308525,0.0008
4,2022-01-05,1569,1885,2.402804,0.0009
...,...,...,...,...,...
360,2022-12-27,309,335,2.168285,0.0040
361,2022-12-28,543,653,2.405157,0.0040
362,2022-12-29,320,381,2.381250,0.0009
363,2022-12-30,593,744,2.509275,0.0011


In [14]:
time_series_cc = graph_df[['Date','Average Clustering Coefficient']]
time_series_cc.to_csv(f"Data/Time_Series/crypto_bitcoin/avg_cc.csv",index=False)