In [1]:
import pandas as pd
from system_fns import ArticleEntityAnalysis
from system_fns import average_weighted_clustering_coefficient

In [2]:
df = pd.read_csv('rappler-business-with-ner-timestep-sentiment-score.csv')
df = df[['date', 'id', 'unique_entities']]
df['date'] = pd.to_datetime(df['date']).dt.date
df['unique_entities'] = [eval(l) for l in df['unique_entities']]
df = df.sort_values('date')
df.head()

Unnamed: 0,date,id,unique_entities
0,2022-01-01,1542619,"[Typhoon Odette, Arthur Yap]"
1,2022-01-01,1543036,"[Marek Drimal, Nureddin Nebati, Tayyip Erdogan]"
2,2022-01-01,1543064,"[Ridwan Jamaludin, Perusahaan Listrik Negara, ..."
3,2022-01-02,1543150,"[Ramon Lopez, Rodrigo Duterte, Typhoon Odette]"
4,2022-01-03,1544290,"[Perusahaan Listrik Negara, Puneet Gupta, Sri ..."


In [3]:
print("Number of unique articles:", df['id'].nunique())
print("Earliest datetime record:", df['date'].min())
print("Latest datetime record:", df['date'].max())
df.dtypes

Number of unique articles: 2871
Earliest datetime record: 2022-01-01
Latest datetime record: 2023-07-22


date               object
id                  int64
unique_entities    object
dtype: object

In [4]:
trial = df.query("date <= @pd.to_datetime('2022-01-05').date()")
trial

Unnamed: 0,date,id,unique_entities
0,2022-01-01,1542619,"[Typhoon Odette, Arthur Yap]"
1,2022-01-01,1543036,"[Marek Drimal, Nureddin Nebati, Tayyip Erdogan]"
2,2022-01-01,1543064,"[Ridwan Jamaludin, Perusahaan Listrik Negara, ..."
3,2022-01-02,1543150,"[Ramon Lopez, Rodrigo Duterte, Typhoon Odette]"
4,2022-01-03,1544290,"[Perusahaan Listrik Negara, Puneet Gupta, Sri ..."
5,2022-01-04,1545021,"[Perusahaan Listrik Negara, Sabrin Chowdhury, ..."
6,2022-01-05,1545089,"[Rodrigo Duterte, Benjamin Diokno, Karl Chua, ..."
7,2022-01-05,1545338,"[Jim Cain, Mary Barra, Randy Parker, Jack Hollis]"
8,2022-01-05,1545375,"[Kristalina Georgieva, Gerry Rice, Joe Biden]"
9,2022-01-05,1545834,"[Perusahaan Listrik Negara, Erick Thohir, Muha..."


In [5]:
sample = ArticleEntityAnalysis(trial)

In [6]:
sample.df

Unnamed: 0,date,id,unique_entities
0,2022-01-01,1542619,"[Typhoon Odette, Arthur Yap]"
1,2022-01-01,1543036,"[Marek Drimal, Nureddin Nebati, Tayyip Erdogan]"
2,2022-01-01,1543064,"[Ridwan Jamaludin, Perusahaan Listrik Negara, ..."
3,2022-01-02,1543150,"[Ramon Lopez, Rodrigo Duterte, Typhoon Odette]"
4,2022-01-03,1544290,"[Perusahaan Listrik Negara, Puneet Gupta, Sri ..."
5,2022-01-04,1545021,"[Perusahaan Listrik Negara, Sabrin Chowdhury, ..."
6,2022-01-05,1545089,"[Rodrigo Duterte, Benjamin Diokno, Karl Chua, ..."
7,2022-01-05,1545338,"[Jim Cain, Mary Barra, Randy Parker, Jack Hollis]"
8,2022-01-05,1545375,"[Kristalina Georgieva, Gerry Rice, Joe Biden]"
9,2022-01-05,1545834,"[Perusahaan Listrik Negara, Erick Thohir, Muha..."


In [7]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=1,
    mean=True)

0.18944000470314598

In [8]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=5,
    mean=True)

0.0

In [9]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=5,
    mean=False)

[0.0, 0.0, 0.0, 0.0, 0.0]

In [10]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=(1,5))

  0%|          | 0/5 [00:00<?, ?it/s]

[0.18944000470314598, 0.2632767445114505, 0.10902040405269826, 0.0, 0.0]

In [11]:
whole = ArticleEntityAnalysis(df)

In [12]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=1)

0.05565409461143574

In [13]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=30,
    mean=False)

[0.0553089616468212,
 0.055308637771376574,
 0.055306505125789676,
 0.055346566498120205,
 0.055464596560533236,
 0.05546432931465251,
 0.05552205456621853,
 0.05558781412476494,
 0.05550094413213703,
 0.05553605823315546,
 0.05550735095328261,
 0.055510484300419764,
 0.05552680830649795,
 0.0556193689293721,
 0.05561913362610217,
 0.05561913362610217,
 0.05561913362610217,
 0.05560530731434977,
 0.055644112365016964,
 0.05555858489625754,
 0.055536996774035875,
 0.05556760200311406,
 0.05558747956520915,
 0.05558747956520915,
 0.05565468430902367,
 0.055667190758601916,
 0.055774243743527456,
 0.05590011759595366,
 0.055847231955744925,
 0.05584471652358]

In [14]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=(1,5),
    mean=False)

  0%|          | 0/5 [00:00<?, ?it/s]

[[0.05565409461143574],
 [0.05564944354448933, 0.055654011018121674],
 [0.055663641352023646, 0.055649360653976986, 0.055651821095077984],
 [0.0556710869317348,
  0.055663900527064,
  0.0556472483531164,
  0.05562641261863767],
 [0.05558230562896801,
  0.055671354180217104,
  0.055661792563924964,
  0.055621829412027424,
  0.055609232214344634]]

In [15]:
whole.element_rolling_window_degree_analysis(plot=True)

  results[value].sort()
