# General Practice with Pandas

In [1]:
import pandas as pd

df = pd.read_csv('../data/titanic_dataset.csv')
print(df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


In [2]:
# Filter rows where Survived = 1 and Age > 25, return only Survived and Age columns
filtered = df[(df["Survived"] == 1) & (df["Age"] > 25)][["Survived", "Age"]]
print(filtered)

# Count the Total number of these (Means numbers of rows)
count = filtered.shape[0]
print(f"Total number of survived passengers that are older than 25 yeas old are: {count}")

    Survived   Age
1          1  38.0
2          1  26.0
3          1  35.0
8          1  27.0
11         1  58.0
15         1  55.0
21         1  34.0
23         1  28.0
25         1  38.0
52         1  49.0
53         1  29.0
61         1  38.0
66         1  29.0
74         1  32.0
79         1  30.0
81         1  29.0
85         1  33.0
98         1  34.0
Total number of survived passengers that are older than 25 yeas old are: 18


In [3]:
# Return the first 10 rows with (Name, Sex, Age, and Survived) and align right
print(df[["Name", "Sex", "Age", "Survived"]].head(10))

                                                Name     Sex   Age  Survived
0                            Braund, Mr. Owen Harris    male  22.0         0
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0         1
2                             Heikkinen, Miss. Laina  female  26.0         1
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0         1
4                           Allen, Mr. William Henry    male  35.0         0
5                                   Moran, Mr. James    male   NaN         0
6                            McCarthy, Mr. Timothy J    male  54.0         0
7                     Palsson, Master. Gosta Leonard    male   2.0         0
8  Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)  female  27.0         1
9                Nasser, Mrs. Nicholas (Adele Achem)  female  14.0         1


In [4]:
# Aggregation functions
# Get the mean of all numeric columns / Note you can do the same with (min, max,count, sum)
print(df.mean(numeric_only=True))

# Find the sum of all survived passengers
print(f"The sum of all those who survived: {df['Survived'].sum()}")

PassengerId    50.500000
Survived        0.410000
Pclass          2.400000
Age            27.465769
SibSp           0.730000
Parch           0.440000
Fare           29.517625
dtype: float64
The sum of all those who survived: 41


In [5]:
# Group survived passengers by Survived status and Sex, then count passengers and calculate average age
group = df[df["Survived"] == 1].groupby(["Survived", "Sex"]).agg({"Sex": "count", "Age": "mean"})
print(group)

                 Sex        Age
Survived Sex                   
1        female   31  26.840000
         male     10  24.471667


In [6]:
# Explore all the rows that contain NaN value
nan_rows = df[df.isna().any(axis=1)]
print(nan_rows)

# Explore all the columns that contain NaN value, we can use to_string() to show all the data
nan_columns = df.isna().any(axis=0)
print(nan_columns)

    PassengerId  Survived  Pclass                                  Name  \
0             1         0       3               Braund, Mr. Owen Harris   
2             3         1       3                Heikkinen, Miss. Laina   
4             5         0       3              Allen, Mr. William Henry   
5             6         0       3                      Moran, Mr. James   
7             8         0       3        Palsson, Master. Gosta Leonard   
..          ...       ...     ...                                   ...   
93           94         0       3               Dean, Mr. Bertram Frank   
94           95         0       3                     Coxon, Mr. Daniel   
95           96         0       3           Shorney, Mr. Charles Joseph   
98           99         1       2  Doling, Mrs. John T (Ada Julia Bone)   
99          100         0       2                     Kantor, Mr. Sinai   

       Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked  
0     male  22.0      1 