# Data Science Basics - Pandas

In [None]:
import pandas as pd

## Loading Data

In [None]:
df = pd.read_csv("data/pokemon.csv")

In [None]:
# Print top 5 rows
df.head(5)

In [None]:
# Print column names
df.columns

In [None]:
# Read specific column
print(df["Name"])

# Read specific column in a range
print(df["Name"][0:5])

In [None]:
# Read specific multiple columns
print(df[["Name", "Type 1", "Type 2"]])

# Read specific multiple columns in a range
print(df[["Name", "Type 1", "Type 2"]][0:5])

In [None]:
# Get specific row
print(df.iloc[1])

# Get specific rows in range
print(df.iloc[1:4])

In [None]:
# Get specific location
print(df.iloc[2, 1])

In [None]:
# Iteration
for index, row in df.iterrows():
    print(index, row)

In [None]:
# Iteration specific column
for index, row in df.iterrows():
    print(index, row["Name"])

In [None]:
# Locating elements by value
df.loc[df["Type 1"] == "Fire"]

## Sorting/Describing

In [None]:
# Describe to get statistics information of the dataset
df.describe()

In [None]:
# Sort ascending by values
df.sort_values("Name")

In [None]:
# Sort descending by values
df.sort_values("Name", ascending=False)

In [None]:
# Sort descending by multiple values
df.sort_values(["Type 1", "HP"], ascending=False)

In [None]:
# Sort descending by multiple values and different order
df.sort_values(["Type 1", "HP"], ascending=[1, 0])

## Making changes to the data

In [None]:
# Easiest but not the fastest
df["Total"] = (
    df["HP"]
    + df["Attack"]
    + df["Defense"]
    + df["Sp. Atk"]
    + df["Sp. Def"]
    + df["Speed"]
)
df.head(5)

In [None]:
# Drop a column
df = df.drop(columns=["Total"])
df.head(5)

In [None]:
# Other way of manipulating dataset. Axis 1 = Horizontally, Axis 0 = Vertically.
df["Total"] = df.iloc[:, 4:10].sum(axis=1)
df.head(5)

In [None]:
# Rearranging the dataset
# df = df[["Name", "Type 1", "Type 2"]] - Using this will rearrange the dataset as mentioned.

# Better way of rearranging it
cols = list(df.columns.values)
df = df[cols[0:4] + [cols[-1]] + cols[4:12]]
df.head(5)

## Saving data

In [None]:
## Saving the data with index
df.to_csv("data/modified_pokemon.csv")

In [None]:
## Saving the data without index
df.to_csv("data/modified_pokemon.csv", index=False)

In [None]:
## Saving the data using different seperator
df.to_csv("data/modified_pokemon.csv", index=False, sep="\t")

In [None]:
# Saving in excel format
df.to_excel("data/modified_pokemon.xlsx", index=False)

## Filtering Data

In [None]:
# Single condition
df.loc[df["Type 1"] == "Grass"]
df.head(5)

In [None]:
# Multiple condition (and &)
df.loc[(df["Type 1"] == "Grass") & (df["Type 2"] == "Poison")]
df.head(5)

In [None]:
# Multiple condition (or |)
df.loc[(df["Type 1"] == "Grass") | (df["Type 2"] == "Poison")]
df.head(5)

In [None]:
# Saving and Reindexing
new_df = df.loc[(df["Type 1"] == "Grass") | (df["Type 2"] == "Poison")]
new_df = new_df.reset_index()
new_df.head(5)

In [None]:
# Dropping the index. This will give error.
new_df = new_df.reset_index(drop=True, inplace=True)
new_df.head(5)

In [None]:
# Filter using contain
a = df.loc[df["Name"].str.contains("Mega")]
a.head(5)

In [None]:
# Filter using contain
b = df.loc[~df["Name"].str.contains("Mega")]
b.head(5)

In [None]:
# Filter using Regex
import re

c = df.loc[df["Type 1"].str.contains("Fire|Grass", regex=True)]
c.head(5)

In [None]:
# Filter using Regex and Ignoring Case
import re

d = df.loc[df["Type 1"].str.contains("fire|grass", flags=re.I, regex=True)]
d.head(5)

In [None]:
import re

e = df.loc[df["Name"].str.contains("^pi[a-z]", flags=re.I, regex=True)]
e.head(5)

## Conditional Updates

In [None]:
# Single column example. Modifying "Type 1" based on condition.
df.loc[df["Type 1"] == "Fire", "Type 1"] = "Flamer"
df

In [None]:
# Multiple column example. Modifying "Generation" and "Legendary" based on condition.
df.loc[df["Total"] > 500, ["Generation", "Legendary"]] = "Dummy Value"
df

In [None]:
# Multiple column example. Modifying "Generation" and "Legendary" based on condition to different values.
df.loc[df["Total"] > 500, ["Generation", "Legendary"]] = [
    "Dummy Value 1",
    "Dummy Value 2",
]
df

## Average Statistics

In [None]:
# Single grouping
df.groupby(["Type 1"]).mean()

In [None]:
df.groupby(["Type 1"]).mean().sort_values("Defense", ascending=False)

In [None]:
# Multiple grouping
df.groupby(["Type 1", "Type 2"]).count()

## Working with big dataset

In [None]:
# Reading in chunks
for df in pd.read_csv("data/pokemon.csv", chunksize=5):
    print("CHUNK DF")
    print(df)

In [None]:
# Creating new data frame
new_df = pd.DataFrame(columns=df.columns)
new_df

In [None]:
for df in pd.read_csv("data/pokemon.csv", chunksize=5):
    results = df.groupby(["Type 1"]).count()
    new_df = pd.concat([new_df, results])

new_df