In [36]:
import time
import numpy as np
import pandas as pd
import unittest


In [29]:
data = {
    'Start Time': ['2017-01-01 09:07:57', '2017-01-02 09:07:57', '2017-01-03 00:07:57'],
    'End Time': ['2017-01-01 09:20:53', '2017-01-02 09:20:53', '2017-01-03 00:20:53'],
}

df = pd.DataFrame(data)

# Convert Start Time to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'], errors='coerce')
df['End Time'] = pd.to_datetime(df['End Time'], errors='coerce')

# Extract month, day of week, and hour
df['month'] = df['Start Time'].dt.strftime('%B')
df['day_of_week'] = df['Start Time'].dt.day_name()
df['hour'] = df['Start Time'].dt.hour.astype('Int64')

print(df)
print("-" * 40)

print("\nCalculating The Most Frequent Times of Travel...\n")

# Display the most common month
most_common_month = df['month'].mode()[0]
print(f"The most common month is: {most_common_month}")

# Display the most common day of week
most_common_day = df['day_of_week'].mode()[0]
print(f"The most common day of the week is: {most_common_day}")

# Display the most common start hour
most_common_hour = df['hour'].mode()[0]
print(f"The most common start hour is: {most_common_hour}")

print("-" * 40)

           Start Time            End Time    month day_of_week  hour
0 2017-01-01 09:07:57 2017-01-01 09:20:53  January      Sunday     9
1 2017-01-02 09:07:57 2017-01-02 09:20:53  January      Monday     9
2 2017-01-03 00:07:57 2017-01-03 00:20:53  January     Tuesday     0
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is: January
The most common day of the week is: Monday
The most common start hour is: 9
----------------------------------------


In [30]:
data = {
    'Start Station': ['Station A', 'Station B', 'Station A'],
    'End Station': ['Station B', 'Station C', 'Station B'],
}
df = pd.DataFrame(data)
print(df)
print("-" * 40)

# Display most commonly used start station
most_common_start_station = df['Start Station'].mode()[0]
print(f"The most commonly used start station is: {most_common_start_station}")

# Display most commonly used end station
most_common_end_station = df['End Station'].mode()[0]
print(f"The most commonly used end station is: {most_common_end_station}")

# Display most frequent combination of start station and end station trip
most_common_trip = df.groupby(['Start Station', 'End Station']).size().idxmax()
print(f"The most frequent combination of start station and end station trip is: {most_common_trip[0]} to {most_common_trip[1]}")

print("-" * 40)

  Start Station End Station
0     Station A   Station B
1     Station B   Station C
2     Station A   Station B
----------------------------------------
The most commonly used start station is: Station A
The most commonly used end station is: Station B
The most frequent combination of start station and end station trip is: Station A to Station B
----------------------------------------


In [31]:
data = {
    'Trip Duration': [300, 200, 150],
}
df = pd.DataFrame(data)
print("-" * 40)

# Display total travel time
total_travel_time = df['Trip Duration'].sum()
print(f"Total travel time: {total_travel_time} seconds")

# Display mean travel time
mean_travel_time = round(df['Trip Duration'].mean(),2)
print(f"Mean travel time: {mean_travel_time} seconds")

print("-" * 40)

----------------------------------------
Total travel time: 650 seconds
Mean travel time: 216.67 seconds
----------------------------------------


In [32]:
data = {
    'User Type': ['Subscriber', 'Customer', 'Subscriber'],
    'Gender': ['Male', 'Female', 'Male'],
    'Birth Year': [1985, 1992, 1985],
}

df = pd.DataFrame(data)

print(df)
print("-" * 40)

# Display counts of user types
user_types = df['User Type'].value_counts()
print(f"Counts of user types:\n{user_types}")

# Display counts of gender
gender_counts = df['Gender'].value_counts()
print(f"\nCounts of gender:\n{gender_counts}")

# Display earliest, most recent, and most common year of birth
earliest_year = int(df['Birth Year'].min())
most_recent_year = int(df['Birth Year'].max())
most_common_year = int(df['Birth Year'].mode()[0])
print(f"\nEarliest year of birth: {earliest_year}")
print(f"Most recent year of birth: {most_recent_year}")
print(f"Most common year of birth: {most_common_year}")

print("-" * 40)

    User Type  Gender  Birth Year
0  Subscriber    Male        1985
1    Customer  Female        1992
2  Subscriber    Male        1985
----------------------------------------
Counts of user types:
Subscriber    2
Customer      1
Name: User Type, dtype: int64

Counts of gender:
Male      2
Female    1
Name: Gender, dtype: int64

Earliest year of birth: 1985
Most recent year of birth: 1992
Most common year of birth: 1985
----------------------------------------


In [35]:
data = {
    'Start Time': ['2017-01-01 09:07:57', '2017-01-02 09:07:57', None],
    'End Time': ['2017-01-01 09:20:53', None, '2017-01-03 00:20:53'],
}

df = pd.DataFrame(data)

# Convert Start Time to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'], errors='coerce')
df['End Time'] = pd.to_datetime(df['End Time'], errors='coerce')

# Extract month, day of week, and hour
df['month'] = df['Start Time'].dt.strftime('%B')
df['day_of_week'] = df['Start Time'].dt.day_name()
df['hour'] = df['Start Time'].dt.hour.astype('Int64')

print(df)

# Display the most common month
most_common_month = df['month'].mode()[0]
print(f"The most common month is: {most_common_month}")

# Display the most common day of week
most_common_day = df['day_of_week'].mode()[0]
print(f"The most common day of the week is: {most_common_day}")

# Display the most common start hour
most_common_hour = df['hour'].mode()[0]
print(f"The most common start hour is: {most_common_hour}")


           Start Time            End Time    month day_of_week  hour
0 2017-01-01 09:07:57 2017-01-01 09:20:53  January      Sunday     9
1 2017-01-02 09:07:57                 NaT  January      Monday     9
2                 NaT 2017-01-03 00:20:53      NaN         NaN  <NA>
The most common month is: January
The most common day of the week is: Monday
The most common start hour is: 9


In [38]:
data = {
            'Start Station': ['Station A', 'Station B', 'Station A'],
            'End Station': ['Station B', None, 'Station B'],
        }

# Display most commonly used start station
most_common_start_station = df['Start Station'].mode()[0]
print(f"The most commonly used start station is: {most_common_start_station}")

# Display most commonly used end station
most_common_end_station = df['End Station'].mode()[0]
print(f"The most commonly used end station is: {most_common_end_station}")

# Display most frequent combination of start station and end station trip
most_common_trip = df.groupby(['Start Station', 'End Station']).size().idxmax()
print(f"The most frequent combination of start station and end station trip is: {most_common_trip[0]} to {most_common_trip[1]}")


The most commonly used start station is: Station A
The most commonly used end station is: Station B
The most frequent combination of start station and end station trip is: Station A to Station B


In [40]:
df = pd.read_csv('chicago.csv')

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300000 entries, 0 to 299999
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   Unnamed: 0     300000 non-null  int64  
 1   Start Time     300000 non-null  object 
 2   End Time       300000 non-null  object 
 3   Trip Duration  300000 non-null  int64  
 4   Start Station  300000 non-null  object 
 5   End Station    300000 non-null  object 
 6   User Type      300000 non-null  object 
 7   Gender         238948 non-null  object 
 8   Birth Year     238981 non-null  float64
dtypes: float64(1), int64(2), object(6)
memory usage: 20.6+ MB


In [46]:
# Compter les valeurs manquantes par colonne
missing_count = df.isna().sum()
print(missing_count)


Unnamed: 0           0
Start Time           0
End Time             0
Trip Duration        0
Start Station        0
End Station          0
User Type            0
Gender           61052
Birth Year       61019
dtype: int64
