In [10]:
# Step 1: Import necessary libraries
# pandas is used for data manipulation, numpy for numerical operations,
# matplotlib and seaborn for data visualization.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Step 2: Load the dataset
# The 'storms.csv' file contains information about storm-related data.
# Replace the path with the location of your dataset if necessary.
df = pd.read_csv('storms.csv')

# Step 3: Explore the dataset

# 3.1: Get a quick overview of the dataset
# The .info() method gives a summary of the dataset, including the number of entries,
# data types of each column, and how many non-null values exist in each column.
print(df.info())

# 3.2: Statistical summary of the dataset
# The .describe() method provides descriptive statistics for numerical columns,
# such as the mean, standard deviation, minimum, and maximum values, helping us understand the data's spread.
print(df.describe())

# 3.3: Check for missing values
# The .isnull().sum() method checks how many missing or null values there are in each column.
# This helps in identifying which columns need attention for missing data.
print(df.isnull().sum())





<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22705 entries, 0 to 22704
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   name                          22705 non-null  object 
 1   year                          22705 non-null  int64  
 2   month                         22705 non-null  int64  
 3   day                           22705 non-null  int64  
 4   hour                          22705 non-null  int64  
 5   lat                           22705 non-null  float64
 6   long                          22705 non-null  float64
 7   status                        22705 non-null  object 
 8   category                      5162 non-null   float64
 9   wind                          22705 non-null  int64  
 10  pressure                      22705 non-null  int64  
 11  tropicalstorm_force_diameter  22705 non-null  int64  
 12  hurricane_force_diameter      22705 non-null  int64  
dtypes

# New Section

In [4]:
df = pd.read_csv("storms.csv")
df

Unnamed: 0,name,year,month,day,hour,lat,long,status,category,wind,pressure,tropicalstorm_force_diameter,hurricane_force_diameter
0,AL011975,1975,6,24,12,32.5,-52.0,tropical depression,,20,-999,-1998,-1998
1,AL011975,1975,6,24,18,32.6,-52.6,tropical depression,,25,-999,-1998,-1998
2,AL011975,1975,6,25,0,32.7,-53.2,tropical depression,,25,-999,-1998,-1998
3,AL011975,1975,6,25,60,32.8,-53.2,tropical depression,,25,-999,-1998,-1998
4,AL011975,1975,6,25,12,33.0,-54.5,tropical depression,,25,-999,-1998,-1998
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22700,Sara,2024,11,17,60,16.5,-87.5,tropical storm,,35,1001,120,0
22701,Sara,2024,11,17,12,16.8,-87.9,tropical storm,,35,1001,110,0
22702,Sara,2024,11,17,14,17.0,-88.3,tropical storm,,35,1001,110,0
22703,Sara,2024,11,17,18,17.4,-89.1,tropical depression,,30,1003,0,0


In [5]:
print("\n--- Dataset Info ---")
print(df.info())


--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22705 entries, 0 to 22704
Data columns (total 13 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   name                          22705 non-null  object 
 1   year                          22705 non-null  int64  
 2   month                         22705 non-null  int64  
 3   day                           22705 non-null  int64  
 4   hour                          22705 non-null  int64  
 5   lat                           22705 non-null  float64
 6   long                          22705 non-null  float64
 7   status                        22705 non-null  object 
 8   category                      5162 non-null   float64
 9   wind                          22705 non-null  int64  
 10  pressure                      22705 non-null  int64  
 11  tropicalstorm_force_diameter  22705 non-null  int64  
 12  hurricane_force_diameter      22705 no

In [6]:
print("\n--- Dataset Description ---")
print(df.describe())


--- Dataset Description ---
               year         month          day          hour           lat  \
count  22705.000000  22705.000000  22705.00000  22705.000000  22705.000000   
mean    2001.987932      8.689055     15.75098     22.528254     26.606276   
std       14.388083      1.353205      8.93909     22.556684     10.393664   
min     1975.000000      1.000000      1.00000      0.000000      7.000000   
25%     1990.000000      8.000000      8.00000     12.000000     18.000000   
50%     2003.000000      9.000000     16.00000     18.000000     26.000000   
75%     2015.000000      9.000000     24.00000     22.000000     33.400000   
max     2024.000000     12.000000     31.00000     94.000000     70.700000   

               long     category          wind      pressure  \
count  22705.000000  5162.000000  22705.000000  22705.000000   
mean     -61.275345     1.892871     48.069544    825.105792   
std       21.190347     1.149691     25.960262    554.391164   
min     -136

In [7]:
print("\n--- Missing Values ---")
print(df.isnull().sum())


--- Missing Values ---
name                                0
year                                0
month                               0
day                                 0
hour                                0
lat                                 0
long                                0
status                              0
category                        17543
wind                                0
pressure                            0
tropicalstorm_force_diameter        0
hurricane_force_diameter            0
dtype: int64


In [8]:
data = df.dropna()
data

Unnamed: 0,name,year,month,day,hour,lat,long,status,category,wind,pressure,tropicalstorm_force_diameter,hurricane_force_diameter
70,Blanche,1975,7,27,60,35.9,-70.0,hurricane,1.0,65,987,-1998,-1998
71,Blanche,1975,7,27,12,36.9,-69.0,hurricane,1.0,70,984,-1998,-1998
72,Blanche,1975,7,27,18,37.9,-68.0,hurricane,1.0,75,981,-1998,-1998
73,Blanche,1975,7,28,0,39.3,-67.2,hurricane,1.0,75,980,-1998,-1998
74,Blanche,1975,7,28,60,41.2,-66.4,hurricane,1.0,70,980,-1998,-1998
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22674,Rafael,2024,11,8,0,24.6,-86.6,hurricane,2.0,90,966,180,45
22675,Rafael,2024,11,8,60,24.4,-87.6,hurricane,3.0,105,954,180,45
22676,Rafael,2024,11,8,12,24.4,-88.4,hurricane,2.0,95,962,150,45
22677,Rafael,2024,11,8,18,24.6,-89.1,hurricane,1.0,80,973,150,45
