In [1]:
import pandas as pd
from warnings import filterwarnings

filterwarnings(action='ignore', category=FutureWarning)

def years_ago(arg: str) -> int:
    multiplier = -1 if 'BC' in arg else 1
    result = multiplier * int(arg.replace('BC', ''))
    return 2024 - result

WORLD = '/kaggle/input/world-important-events-ancient-to-modern/World Important Dates.csv'
df = pd.read_csv(filepath_or_buffer=WORLD, index_col=[0]).drop(columns=['Date', 'Month'])
df['years ago'] = df['Year'].apply(func=years_ago)
df.head()

Unnamed: 0_level_0,Name of Incident,Year,Country,Type of Event,Place Name,Impact,Affected Population,Important Person/Group Responsible,Outcome,years ago
Sl. No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Indus Valley Civilization Flourishes,2600 BC,India,Civilization,Indus Valley,Development of one of the world's earliest urb...,Local inhabitants,Indus Valley people,Positive,4624
2,Battle of the Ten Kings,1400 BC,India,Battle,Punjab,Rigvedic tribes consolidated their control ove...,Rigvedic tribes,Sudas,Positive,3424
6,Establishment of the Delhi Sultanate,1206,India,Political,Delhi,Muslim rule established in parts of India,People of Delhi and surrounding regions,QutbUnknownudUnknowndin Aibak,Mixed,818
7,Battle of Panipat,1526,India,Battle,Panipat,Foundation of the Mughal Empire in India,Northern Indian kingdoms,Babur,Mixed,498
8,Establishment of British Raj,1858,India,Colonial,Whole India,Start of direct British governance in India,Indian subcontinent,British East India Company/Empire,Negative,166


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1096 entries, 1 to 1151
Data columns (total 10 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Name of Incident                    1096 non-null   object
 1   Year                                1096 non-null   object
 2   Country                             1096 non-null   object
 3   Type of Event                       1096 non-null   object
 4   Place Name                          1096 non-null   object
 5   Impact                              1096 non-null   object
 6   Affected Population                 1096 non-null   object
 7   Important Person/Group Responsible  1096 non-null   object
 8   Outcome                             1096 non-null   object
 9   years ago                           1096 non-null   int64 
dtypes: int64(1), object(9)
memory usage: 94.2+ KB


In [3]:
df.nunique()

Name of Incident                      1030
Year                                   330
Country                                113
Type of Event                          402
Place Name                             523
Impact                                1081
Affected Population                    591
Important Person/Group Responsible     801
Outcome                                  4
years ago                              330
dtype: int64

In [4]:
from plotly.express import histogram
histogram(data_frame=df, x='years ago', log_y=True, color='Outcome')

Almost all of our dataset consists of data from positive events from the last couple of hundred years.

In [5]:
histogram(data_frame=df, x='Outcome')

We have unbalanced classes and almost no ongoing events, so when we graph the data we can ignore ongoing events.

In [6]:
from plotly.express import scatter
scatter(data_frame=df[df['Outcome'] != 'Ongoing'], x='Type of Event', y='Country', facet_col='Outcome', height=3*2400, color='years ago',
       hover_name='Name of Incident', hover_data=['Place Name', 'Impact', 'Affected Population',
       'Important Person/Group Responsible',], facet_col_wrap=1)

This messy thing is as close as we're going to get to seeing all the data in a single graph; we lose older events in the same place of the same type due to race conditions. Graphing this kind of data sensibly in a single composite graph is hard.