## Task1: Setup and DataFrame Creation

In [None]:
# pandas installation code
!pip install pandas

In [None]:
# Import Libraries (Pandas)
import pandas as pd # pd can be anything but a shorter abbrieviation of the actual word i prefered

In [None]:
# Creating a DataFrame from a dictionary
std_details ={
    "Name":["Rajib", "Ram", "Tanaka"],
    "Age": [25, 30, 45],
    "Country": ["Nepal", "India", "Japan"]
}
print(type(std_details))
std_details_dataframe = pd.DataFrame(std_details)
print(std_details_dataframe)

In [None]:
# Creating a DataFrame from a list of Dictionary
std_data = [{"Name":"Zoro", "Age":20,"Country":"Japan"},{"Name":"Ramesh", "Age":35,"Country":"Bangladesh"},{"Name":"Sushil", "Age":27,"Country":"Nepal"}]
print(type(std_data))
std_data_dataframe = pd.DataFrame(std_data)
print(std_data_dataframe)


In [None]:
# Read Data from a CSV File
csv_data = pd.read_csv("messed_dataset.csv")
print(csv_data)
#changing the read csv data to a dataframe
csv_dataframe = pd.DataFrame(csv_data)
print(csv_dataframe)


## Task2: Viewing and Inspecting Data

In [None]:
# Displaying first and last few rows of the csv data
print(csv_dataframe.head()) #first 5 rows
print(csv_dataframe.tail()) #last 5 rows

In [None]:
# Summary of the DataFrame
print(csv_dataframe.info()) #Basic Information

In [None]:
print(csv_dataframe.describe()) # Statistics

In [None]:
# shape of the dataframe
print(csv_dataframe.shape)

In [None]:
# columns in the dataframe
print(csv_dataframe.columns)

## Task 3: Selection and Indexing

In [None]:
# Select a single column
print(csv_dataframe["Email"])

In [None]:
# select multiple columns
csv_dataframe[["Name", "Country"]]

In [None]:
# Selecting Rows by index
csv_dataframe.iloc[5]

In [None]:
csv_dataframe.iloc[:5]

In [None]:
csv_dataframe.iloc[5:10]

In [None]:
# Selecting Rows by labels
csv_dataframe.loc[9,'Name']

In [None]:
csv_dataframe.loc[:9, "Name"]

In [None]:
csv_dataframe.loc[5:9, "Name"]

In [None]:
csv_dataframe.loc[:9, ["Name","Age","Email"]]

##### Note: loc is used for label based selection and iloc is used for integer based selection

## Task 4: Handling Missing Data

In [None]:
# Identifying the missing data
print(csv_dataframe.isnull()) # shows True for all the missing data
print(csv_dataframe.isnull().sum()) # calculates the number of missing data

In [None]:
#  drop rows with missing values
test_csv = pd.read_csv("test.csv")
new_dframe = pd.DataFrame(test_csv);
print(new_dframe)
print(new_dframe.isnull())
new_dframe.dropna(inplace=True)
print(new_dframe) # the row with null values has been removed

In [None]:
print(csv_dataframe.head().isnull())
#Filling the missing values
csv_dataframe.fillna(value={"Salary":15000},inplace=True) # filling single column value
csv_dataframe.fillna(value={"Name":"John","Age":20,"Email":"nic@gmail.com","Country":"Nepal","DateOfJoining":"2024-07-04"},inplace=True) # multiple columns value at once
print(csv_dataframe)

In [None]:
csv_dataframe.isnull().sum()

## Task 5: Data Operations

In [None]:
# Add a new Column to Dataframe
new_dframe["Gender"] = ["Male", "Female"] 
# Need to add all rows values, my new_dframe has 2 rows after missing value drop so i added only two values
print(new_dframe)

In [None]:
# Add Gender in csv_dataframe which has 100 rows
csv_dataframe["Gender"] = ["Male","Female"] * (len(csv_dataframe)//2)
print(csv_dataframe)

In [None]:
# Deleting a columns from the dataframe
new_dframe.drop(columns=["Country"],inplace=True) # delete single columns
print(new_dframe)

In [None]:
# Deleting a columns from the dataframe
new_dframe.drop(columns=["Email","Age"],inplace=True) # Delete multiple columns
print(new_dframe)

In [None]:
# Renaming a column
new_dframe.rename(columns={"Gender":"Sex"},inplace=True)
print(new_dframe)

In [None]:
csv_dataframe.rename(columns={"Gender":"Sex"},inplace=True)
print(csv_dataframe)

In [None]:
# Apply function to a column
new_dframe["Sex"] = new_dframe["Sex"].apply(lambda x:x + "s")
print(new_dframe)

In [None]:
# Apply function to a column
csv_dataframe["Name"] = csv_dataframe["Name"].apply(lambda x:"NIC " + x)
print(csv_dataframe.head())
print(csv_dataframe.tail())

## Task 6: GroupBy Operations

In [None]:
# Group the DataFrame by a columns and calculate summary statistics
grade_dict = {
    "Name":["Rajib", "Ram", "Tanaka", "So Hyun", "Min Ho"],
    "Age": [25, 30, 45, 26, 42],
    "Score": [85, 89, 58, 98, 68]
}
grp_dataFrame = pd.DataFrame(grade_dict)
print(grp_dataFrame)

In [None]:
# Group by Name
grped_data = grp_dataFrame.groupby("Name")
print(grped_data)

In [None]:
# Calculate the mean
data_mean = grped_data["Score"].mean()
print(data_mean)

In [None]:
# summary
summary = grped_data.describe()
print(summary)

In [None]:
# iterate over groups and siplay their group names and data
for group_name, group_data in grped_data:
    print(f"Group Name: {group_name}")
    print(group_data)
    print() # added to display empty area to distinguish each group

## Task 7: Merging and Joining DataFrames

In [None]:
df1 = pd.DataFrame({
    "Name":["John","Drew","Seth"],
    "Score":[90, 75, 88],
    "std_id":["std_0","std_1","std_2"]},
    index=["S0","S1","S2"]
    )

df2 = pd.DataFrame({
    "Age":[45,35,30],
    "Country":["USA","UK","USA"],
    "std_id":["std_0","std_1","std_2"]},
    index=["S0","S1","S2"]
    )

In [None]:
# Merging two DataFrames on a common column
merged_df = pd.merge(df1, df2, on="std_id")
print(merged_df)

In [None]:
# Joining the DataFrames using their indices
indices_join_df = df1.join(df2, lsuffix='_left', rsuffix='_right')
print(indices_join_df)

## Task 8: Working with Dates and Times

In [None]:
# Create a datetime column DataFrame using date_range
merged_df["Marked_Date"] = pd.date_range(start="2024-07-01", periods=len(merged_df), freq="D")
print(merged_df)

In [None]:
# Coverting to a datetime
merged_df["Marked_Date"] = pd.to_datetime(merged_df["Marked_Date"])
print(merged_df)

In [None]:
merged_df.info()

In [None]:
# using datetimeIndex
df1["DOB"] = pd.DatetimeIndex(["1977-04-23 08:46:12","1985-06-06 13:32:56", "1986-05-28 19:56:13"])
print(df1)

In [None]:
df1.info()

## Task 9: Input and Ouput

In [None]:
#  reading values from a csv file into a dataframe
test_csv = pd.read_csv("test.csv")
csv_to_df = pd.DataFrame(test_csv);
print(csv_to_df)

In [None]:
# Dataframe to a CSV File
## create a new dataframe from scratch
person_details = pd.DataFrame({
    "std_id":["std_0","std_1","std_2"],
    "Name":["John","Drew","Seth"],
    "Score":[90, 75, 88],
    "Age":[45,35,30],
    "Country":["USA","UK","USA"],},
    )

# code to add dataframe to csv
person_details.to_csv("personal_details.csv", index=False)

## converting the previously read csv and converted to dataframe to a new csv
csv_to_df.to_csv("newTest.csv", index=False)


In [None]:
# reading an excel file
excel_read = pd.read_excel("SaleData.xlsx")
excel_to_df = pd.DataFrame(excel_read)
print(excel_to_df)

In [None]:
# converting the csv to excel
person_details.to_excel("person_details.xlsx", sheet_name="details")

## Task 10: Visaulization

#### Simple Plot using matplotlib

In [None]:
# Install matplotlib
!pip install matplotlib

In [None]:
import matplotlib.pyplot as plt

In [None]:
#Simple Data
x = [1,3,5,7,11,13]
y = [2,4,6,8,10,12]

# Plot Data
plt.plot(x,y)

# Customize labels and title
plt.xlabel("Prime Numbers")
plt.ylabel("Even Numbers")
plt.title("Simple Chart")
plt.show()

#### Bar Plot using Seaborn

In [None]:
# Install seaborn
!pip install seaborn

In [None]:
# Import seaborn library
import seaborn as sns

In [None]:
# Data
languages = ['Python', 'Java', 'JavaScript', 'C#', 'C++']
popularity = [70, 60, 50, 40, 30]

# Convert to dataframe
data_df = pd.DataFrame({'Languages': languages, 'Popularity': popularity})

# Create bar plot using Seaborn
sns.barplot(x="Languages", y="Popularity", data=data_df)

# Customize labels and title
plt.xlabel('Programming Languages')
plt.ylabel('Popularity')
plt.title('Bar Plot using Seaborn')

# Show plot
plt.show()