In [6]:
import numpy as np
import pandas as pd
from statsmodels.tsa.arima_process import ArmaProcess
import plotly.express as px

from datetime import datetime
from dateutil.relativedelta import relativedelta
import datetime

from utiles import *

# Import data

In [70]:
data = pd.read_csv("data.csv")
data=data.set_index('Unnamed: 0')
data = data.drop(data[data['Event_first_revision_date'] == "0"].index) # drop empty Event date 73
data['Event_first_revision_date'] = pd.to_datetime(data['Event_first_revision_date'])

# events_df contains the last 2 information of all events of the wikipediaapi_accidents (first_revision_date and cat)
events_df=pd.DataFrame()
events_df=data.iloc[:,-2:] #first_revision_date and cat
events_df['Main_Event_date'] = pd.to_datetime(events_df['Event_first_revision_date']) # Event_ first_revision_date
events_df['Main_Event_date']=[d.date() for d in events_df['Main_Event_date']]
events_df['Main Event']=events_df.index

In [71]:
events_df

Unnamed: 0_level_0,Category,Event_first_revision_date,Main_Event_date,Main Event
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Firestar,Fictional_characters,2006-10-27 22:02:07+00:00,2006-10-27,Firestar
Dark Avengers,Fictional_characters,2008-12-08 22:34:36+00:00,2008-12-08,Dark Avengers
David North,Fictional_characters,2008-05-12 16:05:29+00:00,2008-05-12,David North
Titania,Fictional_characters,2003-05-01 16:50:44+00:00,2003-05-01,Titania
Human Torch,Fictional_characters,2003-05-28 04:43:47+00:00,2003-05-28,Human Torch
...,...,...,...,...
2002 Mindanao earthquake,Earthquakes,2010-05-29 20:43:00+00:00,2010-05-29,2002 Mindanao earthquake
2002 Sumatra earthquake,Earthquakes,2010-11-29 09:44:40+00:00,2010-11-29,2002 Sumatra earthquake
2003 Alabama earthquake,Earthquakes,2006-05-10 01:04:53+00:00,2006-05-10,2003 Alabama earthquake
2003 Coquimbo earthquake,Earthquakes,2011-02-02 02:41:34+00:00,2011-02-02,2003 Coquimbo earthquake


In [8]:
data.Category.value_counts()

Fictional_characters    492
train_accident          475
air_accidents           471
Movies                  448
Earthquakes             446
Actors                  434
Billionaires            326
Capitals                203
Countries               197
Universities            144
Volcano_eruptions        31
Hurricanes               11
Name: Category, dtype: int64

In [84]:
dic_peak_timestamps={}
dic_Impacting_event_OnPeak={}
dic_inpact={}

def aniv_Cat(cat,smoothing_window):
    
    df= data[data["Category"]==cat].iloc[:,:-3].T
    df.index = pd.to_datetime(df.index)
    
    print('working on df_peak_timestamps of ', cat)
    df_peak_timestamps=ts2peak(df,smoothing_window)
    df_peak_timestamps['Peak date'] =[d.date() for d in df_peak_timestamps['Timestamps']]
    df_peak_timestamps['First Event Aniv']=[events_df[events_df['Main Event']==event]['Main_Event_date'].values[0] for event in df_peak_timestamps['event']]  # ADD the first aniv date to the peak timestamps df
    #####
    
    # for each peak date in an event find the main event whose Main_Event_date coincides with a peak date
    #['Event', 'Peak Date', 'Peak Prominance', 'Impacting Event', 'Impacting Event Category', 'Impacting Event View count']
    
    print('working on inpact_df of ', cat)
    s1=df_peak_timestamps.merge(events_df, left_on='Peak date', right_on='Main_Event_date', how='left')
    s1=s1[~s1['Main Event'].isnull()]
    inpact_df=pd.DataFrame()
    inpact_df['Event']=s1['event']
    inpact_df['Peak Date']=pd.to_datetime(s1['Timestamps'])
    #inpact_df['Peak Date']=[d.date() for d in s1['Timestamps']]
    inpact_df['Peak Prominance']=s1['prominences']
    inpact_df['Impacting Event']=s1['Main Event']
    inpact_df['Impacting Event Category']=s1['Category']
    inpact_df['Impacting Event View count']=[data[str(dt)][ev] for dt ,ev in zip(inpact_df['Peak Date'],inpact_df['Impacting Event']) ] 
    
    
    
    #how many events in each categories have impacted the peaks in all events in one cat 
    
    #['Peak count', 'All impacting View count', 'Movies impacting View count', 'Earthquakes impacting View count',  
    #     'train_accident impacting View count', 'Billionaires impacting View count', 'air_accidents impacting View count',
    #     'Fictional_characters impacting View count', 'Actors impacting View count', 'Universities impacting View count',
    #      'Volcano_eruptions impacting View count', 'Hurricanes impacting View count', 'Capitals impacting View count']
    
    print('working on Impacting_event_OnPeak_df of ', cat)

    Impacting_event_OnPeak_df=pd.DataFrame()
    Impacting_event_OnPeak_df['Event']=df_peak_timestamps.event.value_counts().index
    Impacting_event_OnPeak_df['Peak count']=df_peak_timestamps.event.value_counts().values
    Impacting_event_OnPeak_df=Impacting_event_OnPeak_df.set_index('Event')
    
    Impacting_event_OnPeak_df = Impacting_event_OnPeak_df.merge(inpact_df["Event"].value_counts(), left_index=True, right_index=True, how='left')
    Impacting_event_OnPeak_df.columns = [*Impacting_event_OnPeak_df.columns[:-1], "All impacting View count"]

    for cat in s1.Category.unique():
        Impacting_event_OnPeak_df = Impacting_event_OnPeak_df.merge(inpact_df[inpact_df["Impacting Event Category"]==cat]["Event"].value_counts(), left_index=True, right_index=True, how='left')
        Impacting_event_OnPeak_df.columns = [*Impacting_event_OnPeak_df.columns[:-1], cat+" impacting View count"]
        
    Impacting_event_OnPeak_df=Impacting_event_OnPeak_df.fillna(0)   
    
    return df_peak_timestamps, inpact_df, Impacting_event_OnPeak_df



for cat in data.Category.unique():
    print('---------------------------------')
    print('Impact analysis on the category: ', cat)
    
    df_peak_timestamps, inpact_df, Impacting_event_OnPeak_df = aniv_Cat(cat,7)
    
    dic_peak_timestamps["{0}".format(cat)]=df_peak_timestamps
    dic_inpact["{0}".format(cat)]=inpact_df
    dic_Impacting_event_OnPeak["{0}".format(cat)]=Impacting_event_OnPeak_df

---------------------------------
Impact analysis on the category:  Fictional_characters
working on df_peak_timestamps of  Fictional_characters
working on inpact_df of  Fictional_characters
working on Impacting_event_OnPeak_df of  Fictional_characters
---------------------------------
Impact analysis on the category:  Actors
working on df_peak_timestamps of  Actors
working on inpact_df of  Actors
working on Impacting_event_OnPeak_df of  Actors
---------------------------------
Impact analysis on the category:  Movies
working on df_peak_timestamps of  Movies
working on inpact_df of  Movies
working on Impacting_event_OnPeak_df of  Movies
---------------------------------
Impact analysis on the category:  Countries
working on df_peak_timestamps of  Countries
working on inpact_df of  Countries
working on Impacting_event_OnPeak_df of  Countries
---------------------------------
Impact analysis on the category:  Capitals
working on df_peak_timestamps of  Capitals
working on inpact_df of  Cap

In [85]:
dic_peak_timestamps

{'Fictional_characters':     Timestamps    right_ips     left_ips  right_bases  left_bases  \
 0   2015-07-29    26.186567    15.571875           29           0   
 1   2015-08-18    56.950980    32.855263           69           0   
 2   2015-09-24    83.133921    74.951937         2523           0   
 3   2015-10-29   116.296748   109.395652          133         104   
 4   2015-12-01   163.734177   139.234375          171         133   
 ..         ...          ...          ...          ...         ...   
 251 2022-05-17  2509.539683  2505.021739         2525        2504   
 252 2022-06-12  2534.208824  2527.622881         2538        2525   
 253 2022-06-27  2553.351471  2546.265000         2554        2539   
 254 2022-07-08  2562.648855  2555.971631         2565        2554   
 255 2022-07-22  2573.579882  2566.864524         2576        2343   
 
      prominences        Value     widths  width_heights       days  months  \
 0      99.000000   556.142857  10.614692     506.64285

In [86]:
dic_inpact

{'Fictional_characters':               Event  Peak Date  Peak Prominance  \
 1          Firestar 2015-08-18       166.428571   
 4          Firestar 2015-12-01       124.571429   
 9          Firestar 2016-04-01        49.285714   
 27         Firestar 2017-04-11       118.571429   
 29         Firestar 2017-05-09        10.428571   
 ...             ...        ...              ...   
 54769  Hope Summers 2021-10-23       214.714286   
 54770  Hope Summers 2021-10-31        12.000000   
 54776  Hope Summers 2022-01-15        11.142857   
 54793  Hope Summers 2022-06-27        63.000000   
 54794  Hope Summers 2022-06-27        63.000000   
 
                                          Impacting Event  \
 1                           Ang Sugo: The Last Messenger   
 4                             Eruption of Mount Tarawera   
 9                                            The Culling   
 27                            Elsterwerda train disaster   
 29                                 Genthin r

In [89]:
dic_inpact['Fictional_characters']

Unnamed: 0,Event,Peak Date,Peak Prominance,Impacting Event,Impacting Event Category,Impacting Event View count
1,Firestar,2015-08-18,166.428571,Ang Sugo: The Last Messenger,Movies,425.0
4,Firestar,2015-12-01,124.571429,Eruption of Mount Tarawera,Volcano_eruptions,12.0
9,Firestar,2016-04-01,49.285714,The Culling,Movies,281.0
27,Firestar,2017-04-11,118.571429,Elsterwerda train disaster,train_accident,2.0
29,Firestar,2017-05-09,10.428571,Genthin rail disaster,train_accident,0.0
...,...,...,...,...,...,...
54769,Hope Summers,2021-10-23,214.714286,1933 United Airlines Boeing 247 mid-air explosion,air_accidents,49.0
54770,Hope Summers,2021-10-31,12.000000,2021 Salisbury rail crash,train_accident,1158.0
54776,Hope Summers,2022-01-15,11.142857,Empire State University,Fictional_characters,3.0
54793,Hope Summers,2022-06-27,63.000000,UM Airlines Flight 4230,air_accidents,14.0


In [87]:
dic_Impacting_event_OnPeak

{'Fictional_characters':                   Peak count  All impacting View count  \
 Event                                                    
 Petra                    363                      49.0   
 Hercules                 361                      66.0   
 Wolverine                312                      43.0   
 Generation X             307                      43.0   
 Deadpool                 303                      48.0   
 ...                      ...                       ...   
 Will o' the Wisp           1                       0.0   
 Radioactive Man            1                       0.0   
 Defensor                   1                       0.0   
 Battering Ram              1                       0.0   
 John Steele                1                       0.0   
 
                   Movies impacting View count  \
 Event                                           
 Petra                                     7.0   
 Hercules                                  5.0   
 Wolver

In [88]:
dic_Impacting_event_OnPeak['Fictional_characters']

Unnamed: 0_level_0,Peak count,All impacting View count,Movies impacting View count,Volcano_eruptions impacting View count,train_accident impacting View count,Actors impacting View count,Billionaires impacting View count,Earthquakes impacting View count,Hurricanes impacting View count,air_accidents impacting View count,Fictional_characters impacting View count,Capitals impacting View count,Universities impacting View count
Event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Petra,363,49.0,7.0,3.0,13.0,4.0,7.0,3.0,1.0,8.0,2.0,0.0,1.0
Hercules,361,66.0,5.0,4.0,18.0,2.0,8.0,6.0,2.0,18.0,2.0,0.0,1.0
Wolverine,312,43.0,6.0,2.0,13.0,0.0,3.0,9.0,2.0,8.0,0.0,0.0,0.0
Generation X,307,43.0,6.0,1.0,10.0,2.0,2.0,9.0,0.0,12.0,1.0,0.0,0.0
Deadpool,303,48.0,7.0,1.0,16.0,2.0,7.0,2.0,0.0,10.0,2.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Will o' the Wisp,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Radioactive Man,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Defensor,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Battering Ram,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# %

In [94]:
Impacting_event_OnPeak_df=dic_Impacting_event_OnPeak['Fictional_characters']

In [95]:
Impacting_event_OnPeak_df['Prc of all View count']=(Impacting_event_OnPeak_df['All impacting View count']/data.Category.value_counts().sum())*100
Impacting_event_OnPeak_df['Prc of Movies View count']=(Impacting_event_OnPeak_df['Movies impacting View count']/data.Category.value_counts()['Movies'])*100
Impacting_event_OnPeak_df['Prc of air_accidents View count']=(Impacting_event_OnPeak_df['air_accidents impacting View count']/data.Category.value_counts()['air_accidents'])*100
Impacting_event_OnPeak_df['Prc of Billionaires View count']=(Impacting_event_OnPeak_df['Billionaires impacting View count']/data.Category.value_counts()['Billionaires'])*100
Impacting_event_OnPeak_df['Prc of Earthquakes View count']=(Impacting_event_OnPeak_df['Earthquakes impacting View count']/data.Category.value_counts()['Earthquakes'])*100
Impacting_event_OnPeak_df['Prc of train_accident View count']=(Impacting_event_OnPeak_df['train_accident impacting View count']/data.Category.value_counts()['train_accident'])*100
Impacting_event_OnPeak_df['Prc of Universities View count']=(Impacting_event_OnPeak_df['Universities impacting View count']/data.Category.value_counts()['Universities'])*100
Impacting_event_OnPeak_df['Prc of Actors View count']=(Impacting_event_OnPeak_df['Actors impacting View count']/data.Category.value_counts()['Actors'])*100
Impacting_event_OnPeak_df['Prc of Fictional_characters View count']=(Impacting_event_OnPeak_df['Fictional_characters impacting View count']/data.Category.value_counts()['Fictional_characters'])*100
Impacting_event_OnPeak_df['Prc of Volcano_eruptions View count']=(Impacting_event_OnPeak_df['Volcano_eruptions impacting View count']/data.Category.value_counts()['Volcano_eruptions'])*100
Impacting_event_OnPeak_df['Prc of Hurricanes View count']=(Impacting_event_OnPeak_df['Hurricanes impacting View count']/data.Category.value_counts()['Hurricanes'])*100
Impacting_event_OnPeak_df['Prc of Capitals View count']=(Impacting_event_OnPeak_df['Capitals impacting View count']/data.Category.value_counts()['Capitals'])*100

In [96]:
Impacting_event_OnPeak_df.iloc[:,13:].mean().to_frame()

Unnamed: 0,0
Prc of all View count,0.487747
Prc of Movies View count,0.563172
Prc of air_accidents View count,0.719559
Prc of Billionaires View count,0.734785
Prc of Earthquakes View count,0.471415
Prc of train_accident View count,0.977758
Prc of Universities View count,0.168526
Prc of Actors View count,0.163411
Prc of Fictional_characters View count,0.178164
Prc of Volcano_eruptions View count,2.206776


# Anniversaries effect

Now the time to investigate the anniversaries effect on the same event. For the we investigate the peaks that occured at the anniversary dates

In [277]:
df_peak_timestamps[['event','Timestamps','prominences','First Event Aniv']]

Unnamed: 0,event,Timestamps,prominences,First Event Aniv
0,China Eastern Airlines Flight 5210,2015-11-12,13.785714,2005-01-14
1,China Eastern Airlines Flight 5210,2016-01-15,18.285714,2005-01-14
2,China Eastern Airlines Flight 5210,2016-08-04,11.928571,2005-01-14
3,China Eastern Airlines Flight 5210,2016-12-12,10.214286,2005-01-14
4,China Eastern Airlines Flight 5210,2017-01-15,16.357143,2005-01-14
...,...,...,...,...
35,Southwest Airlines Flight 345,2021-10-10,867.357143,2013-07-23
36,Southwest Airlines Flight 345,2021-11-30,13.785714,2013-07-23
37,Southwest Airlines Flight 345,2022-02-08,22.785714,2013-07-23
38,Southwest Airlines Flight 345,2022-04-03,26.714286,2013-07-23


In [90]:
df_peak_timestamps['Anniversary']=[date in[aniv+ relativedelta(years=y) for y in range (50)] for date, aniv in zip(df_peak_timestamps['Timestamps'],df_peak_timestamps['First Event Aniv'])]

In [91]:
Event_peak_aniv=df_peak_timestamps[df_peak_timestamps['Anniversary']==True][['event','Timestamps','prominences','First Event Aniv','Anniversary']]

In [92]:
ani=Event_peak_aniv.groupby(['event'],as_index=False).size()
peak=df_peak_timestamps.groupby(['event'],as_index=False).size()
anniversary_df = pd.merge(peak, ani, how='left', on=['event']).fillna(0)
anniversary_df=anniversary_df.rename(columns={'size_x':'number of peaks','size_y':'number of peaks on anniversaries'})

In [93]:
anniversary_df.sort_values(by='number of peaks on anniversaries', ascending=False)

Unnamed: 0,event,number of peaks,number of peaks on anniversaries
98,1902 Guatemala earthquake,15,3.0
359,2011 Sikkim earthquake,88,3.0
378,2015 Illapel earthquake,74,2.0
339,2009 L'Aquila earthquake,120,2.0
326,2008 Iceland earthquake,28,2.0
...,...,...,...
135,1925 Santa Barbara earthquake,39,0.0
134,1925 Dali earthquake,6,0.0
133,1925 Charlevoix–Kamouraska earthquake,18,0.0
132,1924 Pasinler earthquake,3,0.0


The question here is: Are there peaks that occured in the anniversary of  the event?
- Answer is : 
- there are (142 peaks of 25822) from 56 events that coincide with anniversary dates:0.5% 
- there are 224 (56*4) anniversaries for the 56 events 142 of them have peaks : 63.39%

## test on one cat

In [72]:
Fictional_characters_df= data[data["Category"]=="air_accidents"].iloc[:,:-3].T
Fictional_characters_df.index = pd.to_datetime(Fictional_characters_df.index)
Fictional_characters_df

Unnamed: 0,American Airlines Flight 11,United Airlines Flight 175,Japan Airlines Flight 123,Sita Air Flight 601,China Eastern Airlines Flight 5210,LOT Polish Airlines Flight 5055,Gulf Air Flight 072,Garuda Indonesia Flight 865,LAPA Flight 3142,1977 Convair CV-300 crash,...,1935 SABENA Savoia-Marchetti S.73 crash,1938 Kyeema crash,1938 Yosemite TWA crash,Malaysia Airlines Flight 17,American Flyers Airline Flight 280,"An-24 incident at Gambell, Alaska",Ansett-ANA Flight 149,Ansett New Zealand Flight 703,Ariana Afghan Airlines Flight 701,United Express Flight 6291
2015-07-01,1289.0,799.0,1044.0,10.0,22.0,23.0,78.0,35.0,37.0,537.0,...,4.0,18.0,17.0,2500.0,0.0,12.0,14.0,29.0,18.0,0.0
2015-07-02,1410.0,843.0,1179.0,22.0,27.0,31.0,50.0,37.0,55.0,544.0,...,8.0,13.0,11.0,2705.0,0.0,6.0,14.0,24.0,18.0,0.0
2015-07-03,1409.0,930.0,1164.0,20.0,33.0,11.0,59.0,44.0,36.0,697.0,...,4.0,14.0,14.0,3493.0,0.0,9.0,23.0,43.0,19.0,0.0
2015-07-04,1292.0,870.0,861.0,18.0,29.0,31.0,46.0,34.0,40.0,818.0,...,6.0,6.0,7.0,2106.0,0.0,8.0,9.0,54.0,27.0,0.0
2015-07-05,1502.0,905.0,892.0,14.0,31.0,39.0,49.0,32.0,53.0,828.0,...,9.0,15.0,10.0,2302.0,0.0,12.0,26.0,27.0,38.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-27,2501.0,1488.0,390.0,13.0,111.0,85.0,102.0,28.0,63.0,3.0,...,3.0,14.0,0.0,2786.0,0.0,5.0,12.0,91.0,39.0,92.0
2022-07-28,2433.0,1500.0,776.0,19.0,123.0,136.0,124.0,29.0,91.0,2.0,...,4.0,6.0,1.0,2430.0,0.0,6.0,11.0,131.0,42.0,114.0
2022-07-29,2252.0,1403.0,488.0,12.0,93.0,112.0,89.0,30.0,499.0,4.0,...,10.0,8.0,1.0,2168.0,0.0,2.0,12.0,115.0,43.0,110.0
2022-07-30,2356.0,1298.0,428.0,13.0,153.0,115.0,91.0,19.0,203.0,4.0,...,8.0,9.0,1.0,2182.0,0.0,6.0,8.0,70.0,27.0,67.0


In [10]:
#working om events with more than 250 views in mean
#df=Fictional_characters_df[Fictional_characters_df.columns[Fictional_characters_df.mean()>250]]#.iloc[:,:-43]

In [73]:
df_peak_timestamps=ts2peak(Fictional_characters_df,14)
df_peak_timestamps['Peak date'] =[d.date() for d in df_peak_timestamps['Timestamps']]
# ADD the first aniv date to the peak timestamps df
df_peak_timestamps['First Event Aniv']=[events_df[events_df['Main Event']==event]['Main_Event_date'].values[0] for event in df_peak_timestamps['event']]
df_peak_timestamps

Unnamed: 0,Timestamps,right_ips,left_ips,right_bases,left_bases,prominences,Value,widths,width_heights,days,months,year,event,distance_2_peak,Peak date
0,2015-07-19,7.059783,0.651620,15,0,40.214286,1505.857143,6.408162,1485.750000,Sunday,7.0,2015.0,American Airlines Flight 11,NaT,2015-07-19
1,2015-08-07,24.600503,23.552434,26,15,17.071429,1541.928571,1.048068,1533.392857,Friday,8.0,2015.0,American Airlines Flight 11,19 days,2015-08-07
2,2015-08-28,47.907692,42.675579,51,15,76.428571,1825.500000,5.232113,1787.285714,Friday,8.0,2015.0,American Airlines Flight 11,21 days,2015-08-28
3,2015-09-21,73.260762,59.009492,171,15,7375.714286,8827.000000,14.251270,5139.142857,Monday,9.0,2015.0,American Airlines Flight 11,24 days,2015-09-21
4,2015-11-25,138.792758,124.446286,171,108,719.214286,2034.428571,14.346471,1674.821429,Wednesday,11.0,2015.0,American Airlines Flight 11,65 days,2015-11-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,2022-05-05,2488.824324,2486.420699,2502,2480,30.785714,158.714286,2.403625,143.321429,Thursday,5.0,2022.0,United Express Flight 6291,9 days,2022-05-05
65,2022-06-07,2520.539062,2519.178571,2532,2502,19.714286,139.785714,1.360491,129.928571,Tuesday,6.0,2022.0,United Express Flight 6291,33 days,2022-06-07
66,2022-06-14,2531.426316,2524.963415,2532,2521,12.714286,134.214286,6.462901,127.857143,Tuesday,6.0,2022.0,United Express Flight 6291,7 days,2022-06-14
67,2022-06-21,2538.111111,2532.809451,2540,2502,20.142857,168.071429,5.301660,158.000000,Tuesday,6.0,2022.0,United Express Flight 6291,7 days,2022-06-21


In [75]:
# building the impact df
Impacting_event_OnPeak_df=pd.DataFrame()
Impacting_event_OnPeak_df['Event']=df_peak_timestamps.event.value_counts().index
Impacting_event_OnPeak_df['Peak count']=df_peak_timestamps.event.value_counts().values
Impacting_event_OnPeak_df=Impacting_event_OnPeak_df.set_index('Event')
Impacting_event_OnPeak_df

Unnamed: 0_level_0,Peak count
Event,Unnamed: 1_level_1
Hindenburg disaster,305
Space Shuttle Challenger disaster,255
Pan Am Flight 103,237
US Airways Flight 1549,232
TWA Flight 800,231
...,...
KLM Flight 592,1
1959 Air Charter Turkey crash,1
1912 Brooklands Flanders Monoplane crash,1
Aeroflot Flight 99,1


In [224]:
# for each event get the characteristics of its peaks in Flights_peak_features
# Flights_peak_features = pd.read_csv("Flights_peak_features.csv")
# Flights_peak_features.drop('Unnamed: 0', axis=1, inplace=True)
# Flights_peak_features.sort_values(by=['Timestamps','prominences'], ignore_index=True, inplace=True)
# Flights_peak_features['Timestamps'] = pd.to_datetime(Flights_peak_features['Timestamps'])
# df_peak_timestamps['Peak date'] =[d.date() for d in df_peak_timestamps['Timestamps']]
# Flights_peak_features.head(2)

# I- finding occurring events at the peaks 
- to justify collective memory 



In [81]:
# for each peak in an event find the main event from the event_df whose first birthday (Main_Event_date) coincides with a peak date
s1=df_peak_timestamps.merge(events_df, left_on='Peak date', right_on='Main_Event_date', how='left')
s1=s1[~s1['Main Event'].isnull()]
inpact_df=pd.DataFrame()
inpact_df['Event']=s1['event']
inpact_df['Peak Date']=pd.to_datetime(s1['Timestamps'])
#inpact_df['Peak Date']=[d.date() for d in s1['Timestamps']]
inpact_df['Peak Prominance']=s1['prominences']
inpact_df['Impacting Event']=s1['Main Event']
inpact_df['Impacting Event Category']=s1['Category']
inpact_df['Impacting Event View count']=[data[str(dt)][ev] for dt ,ev in zip(inpact_df['Peak Date'],inpact_df['Impacting Event']) ] 
inpact_df

Unnamed: 0,Event,Peak Date,Peak Prominance,Impacting Event,Impacting Event Category,Impacting Event View count
0,American Airlines Flight 11,2015-07-19,40.214286,How to Make Love Like an Englishman,Movies,524.0
1,American Airlines Flight 11,2015-08-07,17.071429,2015 South Kivu earthquake,Earthquakes,0.0
9,American Airlines Flight 11,2016-02-07,12.285714,O'Hare CTA station train crash,train_accident,25.0
10,American Airlines Flight 11,2016-02-18,28.071429,1842 Cap-Haitien earthquake,Earthquakes,7.0
25,American Airlines Flight 11,2017-01-31,27.785714,David Sun,Billionaires,18.0
...,...,...,...,...,...,...
20735,United Express Flight 6291,2021-04-20,109.357143,Benavidez rail disaster,train_accident,14.0
20744,United Express Flight 6291,2021-10-12,10.000000,Xu Shaoyong,Billionaires,3.0
20745,United Express Flight 6291,2021-10-12,10.000000,1963 Aeroflot Tupolev Tu-124 Neva river ditching,air_accidents,20.0
20748,United Express Flight 6291,2021-12-06,55.071429,Himeanole,Movies,39.0


In [82]:
# Impacting_event_OnPeak_df=pd.DataFrame()
# Impacting_event_OnPeak_df['Event']=inpact_df["Event"].value_counts().index
#Impacting_event_OnPeak_df['All impacting View count']=inpact_df["Event"].value_counts().values

Impacting_event_OnPeak_df = Impacting_event_OnPeak_df.merge(inpact_df["Event"].value_counts(), left_index=True, right_index=True, how='left')
Impacting_event_OnPeak_df.columns = [*Impacting_event_OnPeak_df.columns[:-1], "All impacting View count"]

for cat in s1.Category.unique():

    Impacting_event_OnPeak_df = Impacting_event_OnPeak_df.merge(inpact_df[inpact_df["Impacting Event Category"]==cat]["Event"].value_counts(), left_index=True, right_index=True, how='left')
    Impacting_event_OnPeak_df.columns = [*Impacting_event_OnPeak_df.columns[:-1], cat+" impacting View count"]
Impacting_event_OnPeak_df=Impacting_event_OnPeak_df.fillna(0)
Impacting_event_OnPeak_df

Unnamed: 0_level_0,Peak count,All impacting View count,Movies impacting View count,Earthquakes impacting View count,train_accident impacting View count,Billionaires impacting View count,air_accidents impacting View count,Fictional_characters impacting View count,Actors impacting View count,Universities impacting View count,Volcano_eruptions impacting View count,Hurricanes impacting View count,Capitals impacting View count
Event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Hindenburg disaster,305,42.0,8.0,6.0,10.0,3.0,7.0,1.0,1.0,2.0,4.0,0.0,0.0
Space Shuttle Challenger disaster,255,50.0,8.0,5.0,17.0,3.0,10.0,1.0,2.0,1.0,3.0,0.0,0.0
Pan Am Flight 103,237,40.0,2.0,5.0,8.0,5.0,13.0,1.0,1.0,2.0,2.0,1.0,0.0
US Airways Flight 1549,232,48.0,9.0,5.0,21.0,3.0,6.0,2.0,1.0,1.0,0.0,0.0,0.0
TWA Flight 800,231,39.0,6.0,6.0,7.0,4.0,5.0,7.0,1.0,0.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
KLM Flight 592,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1959 Air Charter Turkey crash,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1912 Brooklands Flanders Monoplane crash,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aeroflot Flight 99,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [269]:
data.Category.value_counts()

air_accidents           1146
Earthquakes             1082
Fictional_characters     492
train_accident           477
Movies                   448
Actors                   417
Billionaires             326
Capitals                 203
Countries                197
Universities             143
Volcano_eruptions         31
Hurricanes                11
Name: Category, dtype: int64

In [272]:
Impacting_event_OnPeak_df.iloc[:,13:].mean().to_frame()

Unnamed: 0,0
Prc of all View count,0.279796
Prc of Movies View count,0.168305
Prc of air_accidents View count,0.317705
Prc of Billionaires View count,0.22811
Prc of Earthquakes View count,0.507316
Prc of train_accident View count,0.492001
Prc of Universities View count,0.075796
Prc of Actors View count,0.061252
Prc of Fictional_characters View count,0.05996
Prc of Volcano_eruptions View count,0.656715


The question here is: For every peak in each event , are there other Events that have started at the same time as a peak?
- answer from the peaks in 430 events , 0,3% peaks of these peaks occured at the same time as the first  occurence of 940 events

In [274]:
inpact_df[inpact_df['Impacting Event Category']=='Movies'].groupby(['Event'],as_index=False).size()['size'].mean()

2.0253164556962027

In [275]:
# an event is affected in average by the occurrences of 9 other events
inpact_df.groupby(['Event'],as_index=False).size()['size'].mean()

15.891280947255114

In [276]:
# in avg an Impacting event impacts 19 other events by their first occurrences
inpact_df.groupby(['Impacting Event'],as_index=False).size()['size'].mean()

15.219587628865979

In [112]:
57*4

228

In [81]:
(142/224)*100

63.39285714285714

The question here is: For every peak in each event , are there other anniversarry that have started at the same time as a peak?

In [136]:
Flights_peak_features['Event date'][23609]==new_events['Event date'][94]

True

In [102]:
new_events['Event date'][94]

datetime.date(2020, 1, 2)

In [105]:
Flights_peak_features['Timestamps'][23609]

Timestamp('2020-01-02 00:00:00')