In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "Name": ["Tanmay", "Akshay", "Aditya", "Sujit", "Om"],
    "Age": [25, 30, np.nan, 35, 40],
    "Salary": [50000, 0, 60000, 0, 70000],
    "Department": ["HR", "Finance", "IT", "Marketing", None],
}

In [5]:
df = pd.DataFrame(data)
df.to_csv("example.csv", index=False)
df.to_excel("example.xlsx", index=False)


In [6]:
df_csv = pd.read_csv("example.csv")
df_xlsx = pd.read_excel("example.xlsx")


In [7]:
print("Shape of CSV Data:", df_csv.shape)
print("Shape of XLSX Data:", df_xlsx.shape)


Shape of CSV Data: (5, 4)
Shape of XLSX Data: (5, 4)


In [8]:
print("\nMissing Values:\n", df_csv.isnull().sum())



Missing Values:
 Name          0
Age           1
Salary        0
Department    1
dtype: int64


In [9]:
print("\nData Types:\n", df_csv.dtypes)


Data Types:
 Name           object
Age           float64
Salary          int64
Department     object
dtype: object


In [10]:
print("\nCount of Zeros in each column:\n", (df_csv == 0).sum())



Count of Zeros in each column:
 Name          0
Age           0
Salary        2
Department    0
dtype: int64


In [11]:
selected_data = df_csv.loc[1:3, ["Name", "Age"]]
print("\nSelected Data:\n", selected_data)



Selected Data:
      Name   Age
1  Akshay  30.0
2  Aditya   NaN
3   Sujit  35.0


In [12]:
sorted_data = df_csv.sort_values(by="Age", ascending=True)
print("\nSorted Data by Age:\n", sorted_data)



Sorted Data by Age:
      Name   Age  Salary Department
0  Tanmay  25.0   50000         HR
1  Akshay  30.0       0    Finance
3   Sujit  35.0       0  Marketing
4      Om  40.0   70000        NaN
2  Aditya   NaN   60000         IT


In [13]:
print("\nDescriptive Statistics:\n", df_csv.describe())



Descriptive Statistics:
              Age        Salary
count   4.000000      5.000000
mean   32.500000  36000.000000
std     6.454972  33615.472628
min    25.000000      0.000000
25%    28.750000      0.000000
50%    32.500000  50000.000000
75%    36.250000  60000.000000
max    40.000000  70000.000000


In [14]:
print("\nUnique Values per Column:\n", df_csv.nunique())
print("\nFormat of 'Age':", df_csv["Age"].dtype)



Unique Values per Column:
 Name          5
Age           4
Salary        4
Department    4
dtype: int64

Format of 'Age': float64


In [15]:
df_csv["Age"] = df_csv["Age"].fillna(0).astype(int)
print("\nUpdated Age Column Data Types:\n", df_csv.dtypes)


Updated Age Column Data Types:
 Name          object
Age            int64
Salary         int64
Department    object
dtype: object
