In [None]:
# 📥 Reading CSV Files with Pandas

# CSV (Comma-Separated Values) files are one of the most common formats for storing tabular data.

# Pandas makes it very easy to read and analyze CSV files using `pd.read_csv()`.

In [8]:
import pandas as pd

# Read a CSV file (you can replace this with your own path or URL)
df = pd.read_csv('train.csv')

# Show first 5 rows
print(df.head())


### 📌 Common Parameters in `read_csv()`:
# - `sep` → field delimiter (default is comma)  
# - `header` → row to use as column names  
# - `names` → custom column names  
# - `index_col` → column to use as index  
# - `usecols` → list of columns to read  
# - `nrows` → number of rows to read  
# - `dtype` → data types for columns  

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


In [14]:
# Read only specific columns
df_subset = pd.read_csv('train.csv', usecols=["Name", "Age"])
print(df_subset.head())

# Use a specific column as index
df_indexed = pd.read_csv('train.csv', index_col="PassengerId")
print(df_indexed.head())


                                                Name   Age
0                            Braund, Mr. Owen Harris  22.0
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  38.0
2                             Heikkinen, Miss. Laina  26.0
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  35.0
4                           Allen, Mr. William Henry  35.0
             Survived  Pclass  \
PassengerId                     
1                   0       3   
2                   1       1   
3                   1       3   
4                   1       1   
5                   0       3   

                                                          Name     Sex   Age  \
PassengerId                                                                    
1                                      Braund, Mr. Owen Harris    male  22.0   
2            Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0   
3                                       Heikkinen, Miss. Laina  female  26.0   
4             

In [None]:
### ⚠️ Handling Errors

# You can avoid parsing errors using:

# - `error_bad_lines=False` → skip bad lines (deprecated in newer versions)
# - `on_bad_lines='skip'` → Pandas 1.3+  
# - `na_values` → define what counts as missing

In [20]:
df_clean = pd.read_csv('train.csv', on_bad_lines='skip', na_values=["?", "n/a", "NA"])
df_clean.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
# ✅ Use `pd.read_csv()` to load data from CSV files  
# ✅ Many options exist for handling real-world messy files  
# ✅ Next: Indexing and Slicing rows/columns