# Pandas

In [2]:
import pandas as pd

## 1. Create dataset as a dictionary

In [3]:
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
'Math': [89, 78, 67, 85, 92],
'English': [72, 88, 79, 65, 84],
'Science': [90, 82, 87, 78, 88],
'Age': [15, 16, 15, 17, 16]
}
# Convert the data dictionary to a DataFrame
df = pd.DataFrame(data)
print(df)

      Name  Math  English  Science  Age
0    Alice    89       72       90   15
1      Bob    78       88       82   16
2  Charlie    67       79       87   15
3    David    85       65       78   17
4      Eve    92       84       88   16


## 2. Reading Data:


In [None]:
# From a CSV file:
df = pd.read_csv('...floder_name/.../file_name.csv'

# From an Excel file:
df = pd.read_excel('...floder_name/.../file_name.xlsx')

## 3. Viewing Data:

In [None]:
# First 10 rows:
print(df.head(10))

# Last 10 rows:
print(df.tail(10))

## 4. Basic Data Information

In [None]:
# Data types, non-null values, and memory usage:
df.info()

# Summary statistics:
df.describe()

# Unique values in column_name:
df['column_name'].value_counts()
df['column_name'].unique()

## 5. Selecting Data

In [None]:
# Selecting a single column:
col = df['column_name']

# Selecting multiple columns:
cols = df[['col_name1', 'col_name2']]

# Selecting rows by condition:
filtered = df[df['column_name'] > value]

# Using "&" for multiple "and" conditions in Pandas:
df[(df['col_name1'] >= 9) & (df['col_name2'] == 11)]

# Using "|" for multiple "or" conditions in Pandas:
df[(df['col_name1'] >= 9) | (df['col_name2'] == 3)]

## 6. Modifying Data

In [None]:
# Adding a new column:
df['new_col'] = df['col1'] + df['col2']

# Dropping a column:
df.drop('column_name', axis=1, inplace=True)

## 7. Handling Missing Data:

In [None]:
# Dropping rows with missing values:
df.dropna(inplace=True)

# Drop all lines/index with missing values
df.dropna(axis=1)

# Drop all columns with missing values
df.dropna(axis=1)

# Filling missing values:
df['column_name'].fillna(value, inplace=True)

# Example, Replace all missing values with 0
df.fillna(0)

# Fill NaNs in column 'col_name1' with its mean:
df['col_name1'].fillna(df['col_name1'].mean())

## 8. Saving Data:

In [None]:
# To CSV:
df.to_csv('output.csv', index=False)

# To Excel:
df.to_excel('output.xlsx', index=False)