In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
import base64
import io
populations = pd.read_csv('/kaggle/input/battle/populations_clean.csv')
countries = pd.read_csv('/kaggle/input/battle/countries_areas.csv')
battles = pd.read_csv('/kaggle/input/battle/battles_clean.csv')

In [None]:
battle_count = battles.groupby(['area', 'year']).size().reset_index(name='battle_count')
battles = pd.merge(battles, battle_count, on=['area', 'year'], how='left')
merged_data = pd.merge(populations, battles, on=['area', 'year'], how='left')
merged_data = pd.merge(merged_data, countries, on='area', how='left')
merged_data['battle_count'].fillna(0, inplace=True)

# Load the world map using GeoPandas
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Function to animate population and battle counts
fig, ax = plt.subplots(figsize=(10, 6))

def animate(year):
    ax.clear()
    ax.set_title(f'Population and Battles Over Time\nYear: {year}')

    # Plot the world map
    world.plot(ax=ax, color='lightgray')

    # Filter data for the current year
    year_data = merged_data[merged_data['year'] == year]

    # Plot population as blue circles and battles as red
    for _, row in year_data.iterrows():
        if not pd.isna(row['latitude']) and not pd.isna(row['longitude']):
            ax.scatter(row['longitude'], row['latitude'], s=row['pop_estimate'] * 0.00001, color='b', alpha=0.5, label='Population')
            if row['battle_count'] > 0:
                ax.scatter(row['longitude'], row['latitude'], s=row['battle_count'] * 20, color='r', alpha=0.7, label='Battle Count')

ani = animation.FuncAnimation(fig, animate, frames=sorted(merged_data['year'].unique()), interval=1500)

# Save animation as a GIF
ani.save('population_battles_animation.gif', writer='imagemagick', fps=1)

 # Display the GIF
HTML(f'<img src="population_battles_animation.gif" type="gif" />')

In [None]:
import warnings
warnings.filterwarnings('ignore')


battles_with_region = pd.merge(battles, countries[['area', 'region']], on='area', how='left')

# If 'battle_count' is missing, create it by counting battles per region and year
if 'battle_count' not in battles_with_region.columns:
    battle_counts = battles_with_region.groupby(['region', 'year']).size().reset_index(name='battle_count')
    battles_with_region = pd.merge(battles_with_region, battle_counts, on=['region', 'year'], how='left', suffixes=('', '_battle'))

# Aggregate battles by region and year
battle_by_region_year = battles_with_region.groupby(['region', 'year'])['battle_count'].sum().reset_index()

# Aggregate total battles by region
total_battles_by_region = battle_by_region_year.groupby('region')['battle_count'].sum().reset_index()

plt.figure(figsize=(14, 8))
sns.barplot(data=total_battles_by_region, x='region', y='battle_count', palette='viridis')

# Adding labels and title
plt.title('Total Battles by Region')
plt.xlabel('Region')
plt.ylabel('Total Number of Battles')
plt.xticks(rotation=90)  # Rotate x-axis labels for better readability

plt.show()


In [None]:
battle_counts = battles.groupby(['area', 'year']).size().reset_index(name='battle_count')

# Merge battles with countries to get region information
battles_with_region = pd.merge(battles, countries[['area', 'region']], on='area', how='left')

# Merge battle_counts with region information
battle_counts_with_region = pd.merge(battle_counts, battles_with_region[['region', 'year']].drop_duplicates(), on=['year'], how='left')

# Merge populations_with_region with battle_counts_with_region
populations_with_region = pd.merge(populations, countries[['area', 'region']], on='area', how='left')
merged_data = pd.merge(populations_with_region, battle_counts_with_region, on=['region', 'year'], how='left')

# Fill missing battle counts with 0
merged_data['battle_count'].fillna(0, inplace=True)

# Calculate population growth rate
merged_data['pop_growth'] = merged_data.groupby('region')['pop_estimate'].pct_change() * 100

# Drop rows where population growth rate or battle count is NaN
merged_data = merged_data.dropna(subset=['pop_growth'])

# Create scatter plot with regression line
plt.figure(figsize=(12, 8))
sns.regplot(data=merged_data, x='pop_growth', y='battle_count', scatter_kws={'s':50}, line_kws={'color':'red'})
plt.title('Relationship Between Population Growth and Battle Frequency')
plt.xlabel('Population Growth Rate (%)')
plt.ylabel('Battle Count')
plt.grid(True)
plt.show()

In [None]:
intense_battle_threshold = 50

# Calculate population growth for each region
merged_data['pop_growth'] = merged_data.groupby('region')['pop_estimate'].pct_change() * 100

# Mark years with intense battles
merged_data['intense_battle'] = merged_data['battle_count'] > intense_battle_threshold

# Step 1: Create a grid of bar plots for each region
g = sns.FacetGrid(merged_data, col="region", col_wrap=3, height=4, aspect=1.5, sharey=False)

# Step 2: Plot population growth with color indicating battle intensity
g.map_dataframe(sns.barplot, x='year', y='pop_growth', hue='intense_battle', palette={True: 'red', False: 'blue'})

# Add labels and titles
g.set_titles("{col_name}")
g.set_axis_labels("Year", "Population Growth Rate (%)")

# Adjust the legend and layout
g.add_legend(title="Intense Battle")
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Population Growth Before and After Intense Battles for Each Region')

# Show the plot
plt.show()

In [None]:
region_of_interest = 'Europe'

# Filter data for the selected region
region_data = merged_data[merged_data['region'] == region_of_interest]

# Step 1: Calculate population growth
region_data['pop_growth'] = region_data['pop_estimate'].pct_change() * 100

# Step 2: Identify intense battle years (e.g., battle_count > threshold)
intense_battle_threshold = 50  # Define a threshold for intense battles
region_data['intense_battle'] = region_data['battle_count'] > intense_battle_threshold

# Step 3: Bar plot for population growth before and after intense battles
plt.figure(figsize=(12, 6))

# Create bar plot
sns.barplot(data=region_data, x='year', y='pop_growth', hue='intense_battle', palette={True: 'red', False: 'blue'})

# Adding labels and title
plt.title(f'Population Growth in {region_of_interest} Before and After Intense Battles')
plt.xlabel('Year')
plt.ylabel('Population Growth Rate (%)')

# Show the plot
plt.tight_layout()
plt.show()