In [1]:
pip install pandas


Note: you may need to restart the kernel to use updated packages.


### Data Structures in Pandas
### a) Series: A 1D array similar to a list or NumPy array.

In [10]:
import pandas as pd

# Create a simple series
s = pd.Series([10, 20, 30, 40])
print(s)

0    10
1    20
2    30
3    40
dtype: int64


### DataFrame: A 2D table with labeled rows and columns.

In [11]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [24, 27, 22],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   24     New York
1      Bob   27  Los Angeles
2  Charlie   22      Chicago


In [5]:
df

Unnamed: 0,Name,Age,City
0,Alice,24,New York
1,Bob,27,Los Angeles
2,Charlie,22,Chicago


### Indexing and Selecting Data
### Selecting Columns

In [12]:
# Select a single column
print(df['Name'])

# Select multiple columns
print(df[['Name', 'Age']])

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   24
1      Bob   27
2  Charlie   22


In [13]:
df[["Name","Age"]]

Unnamed: 0,Name,Age
0,Alice,24
1,Bob,27
2,Charlie,22


### Selecting Rows by Index

In [14]:
# Select first two rows
print(df.iloc[:2])

# Select rows by label index
print(df.loc[1])

    Name  Age         City
0  Alice   24     New York
1    Bob   27  Los Angeles
Name            Bob
Age              27
City    Los Angeles
Name: 1, dtype: object


### Boolean Indexing

In [16]:
print(df[df['Age'] > 23])

    Name  Age         City
0  Alice   24     New York
1    Bob   27  Los Angeles


In [17]:
df['Age'] > 23

0     True
1     True
2    False
Name: Age, dtype: bool

### Groupby Operations
### Pandas groupby() is used to group data by a certain column and apply aggregate functions.

In [18]:
data = {
    'Department': ['HR', 'HR', 'IT', 'IT', 'Finance'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Salary': [50000, 52000, 60000, 62000, 58000]
}
df = pd.DataFrame(data)

# Group by Department and calculate the mean salary
grouped = df.groupby('Department')['Salary'].mean()
print(grouped)

Department
Finance    58000.0
HR         51000.0
IT         61000.0
Name: Salary, dtype: float64


### Pivot Table
### Pivot tables allow you to rearrange data based on unique values in a column.

In [19]:
data = {
    'City': ['New York', 'Los Angeles', 'New York', 'Chicago'],
    'Year': [2019, 2019, 2020, 2020],
    'Population': [8.4, 3.9, 8.3, 2.7]
}
df = pd.DataFrame(data)

# Create a pivot table with 'Year' as columns
pivot = df.pivot_table(values='Population', index='City', columns='Year')
print(pivot)

Year         2019  2020
City                   
Chicago       NaN   2.7
Los Angeles   3.9   NaN
New York      8.4   8.3


### Create a DataFrame and Filter Employees with Salary > 60000.

In [20]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Department': ['IT', 'HR', 'IT', 'Finance'],
    'Salary': [50000, 52000, 62000, 58000]
}
df = pd.DataFrame(data)

# Filter employees with Salary > 60000
high_salary = df[df['Salary'] > 60000]
print(high_salary)

      Name Department  Salary
2  Charlie         IT   62000


In [31]:
df.to_excel('df.xlsx')

In [32]:
df

Unnamed: 0,Name,Department,Salary
0,Alice,IT,50000
1,Bob,HR,52000
2,Charlie,IT,62000
3,David,Finance,58000


In [35]:
pd.read_excel("./df.xlsx")

Unnamed: 0.1,Unnamed: 0,Name,Department,Salary
0,0,Alice,IT,50000
1,1,Bob,HR,52000
2,2,Charlie,IT,62000
3,3,David,Finance,58000


### how to draw index preorder index while  reading excel file using pandas

In [39]:
import pandas as pd

# Read Excel file into DataFrame
df = pd.read_excel('df.xlsx')

# Add a preorder index starting from 1
df['Preorder_Index'] = range(1, len(df) + 1)

print(df)


   Unnamed: 0     Name Department  Salary  Preorder_Index
0           0    Alice         IT   50000               1
1           1      Bob         HR   52000               2
2           2  Charlie         IT   62000               3
3           3    David    Finance   58000               4


### Reading an Excel File

In [40]:
import pandas as pd

# Read the Excel file (use the appropriate sheet name or index)
df = pd.read_excel('df.xlsx', sheet_name='Sheet1')  # Or sheet_name=0 for the first sheet
print(df)

   Unnamed: 0     Name Department  Salary
0           0    Alice         IT   50000
1           1      Bob         HR   52000
2           2  Charlie         IT   62000
3           3    David    Finance   58000
