In [1]:
import pandas as pd

battles = pd.read_csv("revolutionary_war.csv")
print(battles)
print(type(battles))


                                Battle  Start Date          State
0                         Powder Alarm    9/1/1774  Massachusetts
1    Storming of Fort William and Mary  12/14/1774  New Hampshire
2     Battles of Lexington and Concord   4/19/1775  Massachusetts
3                      Siege of Boston   4/19/1775  Massachusetts
4                   Gunpowder Incident   4/20/1775       Virginia
..                                 ...         ...            ...
227                Siege of Fort Henry   9/11/1782       Virginia
228         Grand Assault on Gibraltar   9/13/1782            NaN
229          Action of 18 October 1782  10/18/1782            NaN
230          Action of 6 December 1782   12/6/1782            NaN
231          Action of 22 January 1783   1/22/1783       Virginia

[232 rows x 3 columns]
<class 'pandas.core.frame.DataFrame'>


In [2]:
battles = (
    pd.read_csv(
        "revolutionary_war.csv",
        usecols=["Start Date", "State"],
        parse_dates=["Start Date"],
    )
    .set_index("Start Date")
    .loc[:, "State"]
)

print(battles)
print(type(battles))
print(battles.index.dtype)


Start Date
1774-09-01    Massachusetts
1774-12-14    New Hampshire
1775-04-19    Massachusetts
1775-04-19    Massachusetts
1775-04-20         Virginia
                  ...      
1782-09-11         Virginia
1782-09-13              NaN
1782-10-18              NaN
1782-12-06              NaN
1783-01-22         Virginia
Name: State, Length: 232, dtype: object
<class 'pandas.core.series.Series'>
datetime64[ns]


In [3]:
top_states = battles.value_counts(dropna=True).head(3)
print("Top 3 states with the highest number of battles:")
print(top_states)


Top 3 states with the highest number of battles:
State
South Carolina    31
New York          28
New Jersey        24
Name: count, dtype: int64


In [4]:
state_counts = battles.value_counts(dropna=False).sort_index()
top_three_states = battles.value_counts(dropna=True).head(3)
total_battles = battles.size
top_share = top_three_states.sum() / total_battles

print("Battle counts by state (including unknown states):")
print(state_counts)
print(f"Total battles (including NA): {total_battles}")
print("Top 3 states battle counts:")
print(top_three_states)
print(f"Share of battles covered by the top 3 states: {top_share:.2%}")


Battle counts by state (including unknown states):
State
Connecticut        3
Delaware           1
Florida            8
Georgia            6
Indiana            1
Louisiana          1
Massachusetts     11
New Hampshire      1
New Jersey        24
New York          28
North Carolina     9
Ohio               1
Pennsylvania      10
Rhode Island       3
South Carolina    31
Vermont            3
Virginia          21
NaN               70
Name: count, dtype: int64
Total battles (including NA): 232
Top 3 states battle counts:
State
South Carolina    31
New York          28
New Jersey        24
Name: count, dtype: int64
Share of battles covered by the top 3 states: 35.78%


In [5]:
date_index = battles.index
period_edges = pd.date_range(start=date_index.min(), end=date_index.max(), periods=4)
period_labels = [
    f"{period_edges[i].date()} - {period_edges[i + 1].date()}"
    for i in range(len(period_edges) - 1)
]

period_bins = pd.cut(
    battles.index,
    bins=period_edges,
    labels=period_labels,
    include_lowest=True,
    right=True,
)

battles_per_period = battles.groupby(period_bins).size().reindex(period_labels)
most_active_period = battles_per_period.idxmax()

print("Battles grouped by period (chronological order):")
print(battles_per_period)
print(f"Most intense period: {most_active_period}")


Battles grouped by period (chronological order):
1774-09-01 - 1777-06-18     47
1777-06-18 - 1780-04-05     81
1780-04-05 - 1783-01-22    100
Name: State, dtype: int64
Most intense period: 1780-04-05 - 1783-01-22


  battles_per_period = battles.groupby(period_bins).size().reindex(period_labels)
