In [1]:
import pandas as pd
from system_fns import ArticleEntityAnalysis
from system_fns import average_weighted_clustering_coefficient

In [2]:
df = pd.read_csv('business.csv')
df = df[['date', 'id', 'unique_entities']]
df['date'] = pd.to_datetime(df['date']).dt.date
df['unique_entities'] = [eval(l) for l in df['unique_entities']]
df = df.sort_values('date')
df.head()

Unnamed: 0,date,id,unique_entities
816,2023-01-01,2191765,"[Jaime Bautista, Bongbong Marcos, Manuel Tamay..."
815,2023-01-02,2191867,"[Manny Pangilinan, Jaime Bautista]"
814,2023-01-02,2192002,"[Jaime Bautista, Rodrigo Duterte, Manuel Tamay..."
811,2023-01-03,2192961,"[Bongbong Marcos, Rodrigo Duterte]"
813,2023-01-03,2192645,"[Ben Lee, Alidad Tash]"


In [3]:
print("Number of unique articles:", df['id'].nunique())
print("Earliest datetime record:", df['date'].min())
print("Latest datetime record:", df['date'].max())
df.dtypes

Number of unique articles: 817
Earliest datetime record: 2023-01-01
Latest datetime record: 2023-07-24


date               object
id                  int64
unique_entities    object
dtype: object

In [4]:
trial = df.query("date <= @pd.to_datetime('2023-01-05').date()")
trial

Unnamed: 0,date,id,unique_entities
816,2023-01-01,2191765,"[Jaime Bautista, Bongbong Marcos, Manuel Tamay..."
815,2023-01-02,2191867,"[Manny Pangilinan, Jaime Bautista]"
814,2023-01-02,2192002,"[Jaime Bautista, Rodrigo Duterte, Manuel Tamay..."
811,2023-01-03,2192961,"[Bongbong Marcos, Rodrigo Duterte]"
813,2023-01-03,2192645,"[Ben Lee, Alidad Tash]"
812,2023-01-03,2192871,"[Leandro Leviste, Enrique Razon Jr, Manny Pang..."
810,2023-01-04,2193115,"[Joyce Lipa, Joarra Solis, Chuchi Fonacier]"
808,2023-01-05,2194645,"[Jonathan Yan, Kenneth Chow, Lucy Lu]"
807,2023-01-05,2194666,"[Yulia Svyrydenko, Denys Shmyhal, Olena Bilan]"
809,2023-01-05,2193804,"[Bongbong Marcos, Nicholas Mapa, Felipe Medall..."


In [5]:
sample = ArticleEntityAnalysis(trial)

In [6]:
sample.df

Unnamed: 0,date,id,unique_entities
816,2023-01-01,2191765,"[Jaime Bautista, Bongbong Marcos, Manuel Tamay..."
815,2023-01-02,2191867,"[Manny Pangilinan, Jaime Bautista]"
814,2023-01-02,2192002,"[Jaime Bautista, Rodrigo Duterte, Manuel Tamay..."
811,2023-01-03,2192961,"[Bongbong Marcos, Rodrigo Duterte]"
813,2023-01-03,2192645,"[Ben Lee, Alidad Tash]"
812,2023-01-03,2192871,"[Leandro Leviste, Enrique Razon Jr, Manny Pang..."
810,2023-01-04,2193115,"[Joyce Lipa, Joarra Solis, Chuchi Fonacier]"
808,2023-01-05,2194645,"[Jonathan Yan, Kenneth Chow, Lucy Lu]"
807,2023-01-05,2194666,"[Yulia Svyrydenko, Denys Shmyhal, Olena Bilan]"
809,2023-01-05,2193804,"[Bongbong Marcos, Nicholas Mapa, Felipe Medall..."


In [7]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=1,
    mean=True)

0.17999342082457276

In [8]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=5,
    mean=True)

0.0

In [9]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=5,
    mean=False)

[0.0, 0.0, 0.0, 0.0, 0.0]

In [10]:
sample.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=(1,5))

  0%|          | 0/5 [00:00<?, ?it/s]

[0.17999342082457276,
 0.11999248094236888,
 0.09332748517739801,
 0.15749013123685915,
 0.0]

In [11]:
whole = ArticleEntityAnalysis(df)

In [12]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=1)

0.10121812866840822

In [13]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=30,
    mean=False)

[0.11596613549854871,
 0.11619373382955692,
 0.11628162394599884,
 0.11648748837440204,
 0.11656281199893417,
 0.10003553890959481,
 0.09995446269639698,
 0.0996961712976496,
 0.10028774522705695,
 0.10030855385284403,
 0.10018836806125525,
 0.09999050205037117,
 0.09987957783247962,
 0.09998178478713297,
 0.09999800960337568,
 0.09999800960337568,
 0.09938355737394906,
 0.0996786766892242,
 0.10038083533108866,
 0.10007005882876596,
 0.10082779086853394,
 0.10076894894255466,
 0.10072758130437694,
 0.10109962330987612,
 0.10085298954854643,
 0.10113552483484603,
 0.10183359912691778,
 0.10169240716310751,
 0.10183152126319385,
 0.10200873211504366]

In [14]:
whole.aggregate_rolling_window_analysis(
    function=average_weighted_clustering_coefficient,
    window_panel=(1,5),
    mean=False)

  0%|          | 0/5 [00:00<?, ?it/s]

[[0.10121812866840822],
 [0.10118549597609329, 0.10114048773511715],
 [0.10118549597609329, 0.10110744178351985, 0.10116487018304327],
 [0.10115366610567943,
  0.10110744178351985,
  0.1011326289255776,
  0.10137336118799718],
 [0.10126595217070619,
  0.10107520246835412,
  0.1011326289255776,
  0.10133982994792658,
  0.10149851348576014]]

In [15]:
whole.element_rolling_window_degree_analysis(window_size=3,
                                             plot=True,
                                             flat_plot=True)

In [16]:
whole.element_rolling_window_degree_analysis(window_size=7,
                                             plot=True,
                                             flat_plot=True)

In [17]:
whole.element_rolling_window_degree_analysis(window_size=30,
                                             plot=True,
                                             flat_plot=True)