In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/world-important-events-ancient-to-modern/World Important Dates.csv


In [2]:
import plotly.graph_objects as go
import plotly.express as px

In [3]:
df = pd.read_csv('/kaggle/input/world-important-events-ancient-to-modern/World Important Dates.csv')
df.head()

Unnamed: 0,Sl. No,Name of Incident,Date,Month,Year,Country,Type of Event,Place Name,Impact,Affected Population,Important Person/Group Responsible,Outcome
0,1,Indus Valley Civilization Flourishes,Unknown,Unknown,2600 BC,India,Civilization,Indus Valley,Development of one of the world's earliest urb...,Local inhabitants,Indus Valley people,Positive
1,2,Battle of the Ten Kings,Unknown,Unknown,1400 BC,India,Battle,Punjab,Rigvedic tribes consolidated their control ove...,Rigvedic tribes,Sudas,Positive
2,6,Establishment of the Delhi Sultanate,Unknown,Unknown,1206,India,Political,Delhi,Muslim rule established in parts of India,People of Delhi and surrounding regions,QutbUnknownudUnknowndin Aibak,Mixed
3,7,Battle of Panipat,21,April,1526,India,Battle,Panipat,Foundation of the Mughal Empire in India,Northern Indian kingdoms,Babur,Mixed
4,8,Establishment of British Raj,1,May,1858,India,Colonial,Whole India,Start of direct British governance in India,Indian subcontinent,British East India Company/Empire,Negative


In [4]:
df.shape

(1096, 12)

In [5]:
# 1. Line Chart

# Convert 'Year' to integer, ignoring errors for non-numeric values (like 'BC' years, which should have been filtered out already)
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Drop rows with NaN in 'Year' after conversion (which includes 'BC' and any non-numeric values)
df.dropna(subset=['Year'], inplace=True)

# Convert year to century
df['Century'] = df['Year'].apply(lambda x: (int(x) - 1) // 100 + 1)

# Count the number of events per century
events_per_century = df.groupby('Century').size().reset_index(name='Number of Events')

# Create the line plot
fig = px.line(events_per_century, x='Century', y='Number of Events', title='Number of Events Per Century')
fig.update_layout(xaxis_title='Century', yaxis_title='Number of Events', xaxis=dict(tickmode='linear'))
fig.show()

In [6]:
# 2. Bar

# Count the number of events per month
events_per_month = df.groupby('Month').size().reset_index(name='Total Events')

# Sort the months in chronological order, if not already
months_order = ["January", "February", "March", "April", "May", "June",
                "July", "August", "September", "October", "November", "December"]
events_per_month['Month'] = pd.Categorical(events_per_month['Month'], categories=months_order, ordered=True)
events_per_month = events_per_month.sort_values('Month')

# Create a bar graph
fig = px.bar(events_per_month, x='Month', y='Total Events', title='Total Events by Month')
fig.update_layout(xaxis_title='Month', yaxis_title='Total Events')

# Set the y-axis range to 0 to 100
fig.update_yaxes(range=[0, 100])

fig.show()

In [7]:
# 3. Pie

# Count the number of occurrences of each unique outcome
outcome_counts = df['Outcome'].value_counts().reset_index()
outcome_counts.columns = ['Outcome', 'Count']

# Create a pie chart
fig = px.pie(outcome_counts, names='Outcome', values='Count', title='Distribution of Outcomes')

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [8]:
# 4. Box Plot

# Ensure 'Year' is in a numeric format
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Create a box plot for the 'Year' column
fig = px.box(df, y='Year', title='Year Distribution')

fig.show()

In [9]:
# 5. Histogram

# Ensure 'Year' is in a numeric format
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')

# Drop any NaN values in 'Year' column that might have resulted from conversion
df.dropna(subset=['Year'], inplace=True)

# Convert 'Year' to integer if it's in float format after conversion
df['Year'] = df['Year'].astype(int)

# Create a histogram for the 'Year' column
fig = px.histogram(df, x='Year', title='Distribution of Events by Year',
                   nbins=50,  # Adjust the number of bins based on your data range and preference
                   labels={'Year': 'Year'})

fig.update_layout(xaxis_title='Year',
                  yaxis_title='Count of Events',
                  bargap=0.2)  # Adjust the gap between bars if needed

fig.show()

In [10]:
# 6. Violin Plot

# Create a violin plot
fig = px.violin(df, y="Year", box=True, points="all",
                title="Distribution of Years for Important World Events")

# Show the plot
fig.show()

In [11]:
# 7. Heatmap

events_per_year = df.groupby('Year').size().reset_index(name='Events Count')

# Create a scatter plot
fig = px.scatter(events_per_year, x='Year', y='Events Count',
                 title='Number of Important World Events per Year',
                 labels={'Year': 'Year', 'Events Count': 'Number of Events'})

# Show the plot
fig.show()

In [12]:
# 8. Heatmap

data = {
    'A': [1, 2, 3, 4, 5],
    'B': [5, 4, 3, 2, 1],
    'C': [2, 3, 4, 5, 6],
    'D': [5, 3, 2, 4, 1]
}
df = pd.DataFrame(data)

# Calculate the correlation matrix
corr_matrix = df.corr()

# Create a heatmap
fig = px.imshow(corr_matrix,
                text_auto=True,
                labels=dict(x="Feature", y="Feature", color="Correlation"),
                x=corr_matrix.columns,
                y=corr_matrix.columns,
                title="Heatmap of Feature Correlation")

# Show the heatmap
fig.show()

In [13]:
# 9. Bubble chart

data = {
    'Category': ['A', 'B', 'C', 'D', 'E'],
    'Value1': [10, 20, 30, 40, 50],
    'Value2': [100, 85, 60, 30, 10],
    'Size': [1, 2, 3, 4, 5]
}
df = pd.DataFrame(data)

# Create a bubble chart
fig = px.scatter(df, 
                 x='Value1', 
                 y='Value2', 
                 size='Size', 
                 color='Category',
                 hover_name='Category', 
                 size_max=60,
                 title='Bubble Chart Example')

# Show the chart
fig.show()





In [14]:
# 10. Scatter plot on map

# Creating a DataFrame with the latitude and longitude pairs
data = {
    'Name': ['New York City', 'Los Angeles', 'London', 'Tokyo', 'Sydney'],
    'Latitude': [40.7128, 34.0522, 51.5074, 35.6895, -33.8688],
    'Longitude': [-74.0060, -118.2437, -0.1278, 139.6917, 151.2093]
}
df = pd.DataFrame(data)

# Create a scatter plot on a map
fig = px.scatter_geo(df,
                     lat='Latitude',
                     lon='Longitude',
                     hover_name='Name',  # Display the name of the location on hover
                     title='Scatter Plot on Map with 5 Locations')

# Show the plot
fig.show()

In [15]:
# 11. Bubble map

# Extending the previous DataFrame with a 'Size' column for bubble sizes
data = {
    'Name': ['New York City', 'Los Angeles', 'London', 'Tokyo', 'Sydney'],
    'Latitude': [40.7128, 34.0522, 51.5074, 35.6895, -33.8688],
    'Longitude': [-74.0060, -118.2437, -0.1278, 139.6917, 151.2093],
    'Size': [20, 15, 25, 30, 10]  # Example sizes for each location
}
df = pd.DataFrame(data)

# Create a bubble map
fig = px.scatter_geo(df,
                     lat='Latitude',
                     lon='Longitude',
                     size='Size',  # Use the 'Size' column for bubble sizes
                     hover_name='Name',  # Display the name of the location on hover
                     title='Bubble Map with 5 Locations',
                     size_max=50)  # Max size of the bubbles

# Show the plot
fig.show()

In [16]:
# 12. Tree map

data = {
    'Category': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C'],
    'Subcategory': ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2'],
    'Value': [10, 15, 20, 15, 25, 30, 40, 50]
}

df = pd.DataFrame(data)

fig = px.treemap(df, path=['Category', 'Subcategory'], values='Value',
                 title='Treemap Example')
fig.show()

In [17]:
# 13. Sunburst

data = {
    'Category': ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C'],
    'Subcategory': ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2'],
    'Value': [10, 15, 20, 15, 25, 30, 40, 50]
}

df = pd.DataFrame(data)

fig = px.sunburst(df, path=['Category', 'Subcategory'], values='Value',
                  title='Sunburst Example')
fig.show()

In [18]:
# 14. Sankey diagram

# Define nodes
node_labels = ['Node 1', 'Node 2', 'Node 3', 'Node 4', 'Node 5']

# Define links between nodes
link_source = [0, 1, 0, 2, 3, 3] # indices correspond to positions in `node_labels`
link_target = [2, 3, 3, 4, 4, 5]
link_value = [8, 4, 2, 8, 4, 2]

# Create the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
      pad=15,
      thickness=20,
      line=dict(color="black", width=0.5),
      label=node_labels
    ),
    link=dict(
      source=link_source,
      target=link_target,
      value=link_value
    ))])

fig.update_layout(title_text='Sankey Diagram Example')
fig.show()

In [19]:
# 15. Radar Chart

# Categories and values for the radar chart
categories = ['Performance', 'Design', 'Safety', 'Technology', 'Comfort']
values = [80, 70, 90, 85, 75]

# Create radar chart
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=values,
      theta=categories,
      fill='toself',
      name='Product A'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 100]
    )),
  showlegend=False,
  title="Radar Chart Example"
)

fig.show()

In [20]:
# 16. Gantt Chart

import datetime

# Task data
tasks = [
    dict(Task="Task 1", Start='2023-01-01', Finish='2023-02-01', Resource='Development'),
    dict(Task="Task 2", Start='2023-02-15', Finish='2023-03-15', Resource='Design'),
    dict(Task="Task 3", Start='2023-03-20', Finish='2023-04-30', Resource='Testing')
]

# Create figure
fig = go.Figure()

for task in tasks:
    fig.add_trace(go.Bar(
        x=[task['Resource']],
        y=[(datetime.datetime.strptime(task['Finish'], '%Y-%m-%d') - 
            datetime.datetime.strptime(task['Start'], '%Y-%m-%d')).days],
        name=task['Task'],
        base=datetime.datetime.strptime(task['Start'], '%Y-%m-%d'),
        orientation='h'
    ))

# Update layout
fig.update_layout(
    title='Gantt Chart Example',
    xaxis=dict(type='date'),
    yaxis=dict(title='Tasks'),
    barmode='stack'
)

fig.show()