# Container Discharge Date Analysis
Analysis of container discharge dates from Results_apm_all.csv

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

In [None]:
# Read the CSV file
df = pd.read_csv(r'C:\Users\k_pow\OneDrive\Documents\Capstone\Webscraping\Results_apm_all.csv')

In [None]:
# Count containers with no discharge date
not_discharged = df['Discharge Date'].isna().sum()
print(f"Number of containers not yet discharged: {not_discharged}")
print(f"Percentage of containers not discharged: {(not_discharged/len(df)*100):.2f}%")

In [None]:
# Convert Discharge Date to datetime and count by date
df['Discharge Date'] = pd.to_datetime(df['Discharge Date'], errors='coerce')

# Get counts by date for discharged containers
discharge_counts = df['Discharge Date'].value_counts().sort_index()

print("\nNumber of containers discharged by date:")
print(discharge_counts)

In [None]:
# Create a bar plot of discharges by date
plt.figure(figsize=(15, 6))
discharge_counts.plot(kind='bar')
plt.title('Container Discharges by Date')
plt.xlabel('Date')
plt.ylabel('Number of Containers')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Calculate some basic statistics about discharge dates
print("Discharge Date Statistics:")
print(f"Earliest discharge date: {df['Discharge Date'].min()}")
print(f"Latest discharge date: {df['Discharge Date'].max()}")
print(f"\nMost common discharge dates:")
print(discharge_counts.head())