# Pandas tutorial

# SERIES

In [30]:
import pandas as pd

data = ['Steve', '35', 'Male', '3.5']
series = pd.Series(data, index=['Name', 'Age', 'Gender', 'Rating'])
print(series)

Name      Steve
Age          35
Gender     Male
Rating      3.5
dtype: object


## Pandas Series in Python has three key properties:

![image.png](attachment:image.png)

In [29]:
import pandas as pd  
s = pd.Series([1, 2, 3.5, "text"])  
print(s)  


0       1
1       2
2     3.5
3    text
dtype: object


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [34]:
s = pd.Series([10, 20, 30])  
print(s)

0    10
1    20
2    30
dtype: int64


In [37]:
s[3] = 40  # This has to give an error because Series size is fixed!
#but it wont, as Pandas extends the Series automatically (not truly immutable) ⚠

In [38]:
print(s)

0    10
1    20
2    30
3    40
dtype: int64


In [43]:
import numpy as np

arr = np.array([1,2,3])
immutable_s=pd.Series(arr)
print(immutable_s)

immutable_s[3]=1000

print(immutable_s)
print(arr)

0    1
1    2
2    3
dtype: int32
0       1
1       2
2       3
3    1000
dtype: int64
[1 2 3]


In [47]:
import numpy as np

arr = np.array([10, 20, 30])
s = pd.Series(arr)
s[3] = 40  # Works, but creates a new object (not modifying the NumPy array)
print(s)


#modifying existing value - yes
s[3] = 40000  
print(s)

0    10
1    20
2    30
3    40
dtype: int64
0       10
1       20
2       30
3    40000
dtype: int64


In [53]:
s.drop(3)
print(s)
#This makes no difference to the original series obj-s

0       10
1       20
2       30
3    40000
dtype: int64


In [49]:
#The row with index "b" is removed, 
#but drop() returns a new Series rather than modifying s in place.
new_s=s.drop(3)
print(s)
print(new_s)

0       10
1       20
2       30
3    40000
dtype: int64
0    10
1    20
2    30
dtype: int64


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [45]:
s = pd.Series([10, 20, 30])  
s[1] = 50  # Updating value at index 1  
print(s)  


0    10
1    50
2    30
dtype: int64


In [None]:
class pandas.Series(data, index, dtype, name, copy)

In [3]:
#1. creating an empty series
import pandas as pd

empty_s = pd.Series()
print(empty_s)


Series([], dtype: float64)


  empty_s = pd.Series()


In [5]:
import pandas as pd

empty_s = pd.Series(dtype=object)
print(empty_s)


Series([], dtype: object)


In [9]:
#creating Series using np array
import pandas as pd
import numpy as np

np_array = np.array([1,3,5,7,9])
print(np_array)

s=pd.Series(np_array)
print(s)

#assigning labesls:
s2=pd.Series(np_array, index=['a','b','c','d','e'])
print('\n',s2)



[1 3 5 7 9]
0    1
1    3
2    5
3    7
4    9
dtype: int32

 a    1
b    3
c    5
d    7
e    9
dtype: int32


In [21]:
print(s2['a':'e':2])#start:stop:step

a    1
c    5
e    9
dtype: int32


In [20]:
print(s[0:2])

0    1
1    3
dtype: int32


#### Series - Methods

In [24]:
print(s.shape)

(5,)


In [25]:
print(s.ndim)

1


In [26]:
print(s.size)

5


In [27]:
print(s.index)

RangeIndex(start=0, stop=5, step=1)


In [28]:
print(s.values)

[1 3 5 7 9]


In [29]:
import pandas as pd
s1 = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
s2 = pd.Series([9, 8, 6, 5], index=['x','a','b','c'])

print(s1)
print(s2)

a    1
b    2
c    3
d    4
e    5
dtype: int64
x    9
a    8
b    6
c    5
dtype: int64


In [30]:
# Apply all Arithmetic Operations and Display the Results
print('\nAddition:\n',s1+s2)
print('\nSubtraction:\n', s1-s2)
print('\nMultiplication:\n', s1 * s2)
print('\nDivision:\n', s1/s2)


Addition:
 a    9.0
b    8.0
c    8.0
d    NaN
e    NaN
x    NaN
dtype: float64

Subtraction:
 a   -7.0
b   -4.0
c   -2.0
d    NaN
e    NaN
x    NaN
dtype: float64

Multiplication:
 a     8.0
b    12.0
c    15.0
d     NaN
e     NaN
x     NaN
dtype: float64

Division:
 a    0.125000
b    0.333333
c    0.600000
d         NaN
e         NaN
x         NaN
dtype: float64


In [13]:
#creating series obj using dict
dict = {'Stud1':'Kannika', 'Stud2':'Yadhu','Stud3':'Shawn'}
print(dict)

s1=pd.Series(dict)
print(s1)

{'Stud1': 'Kannika', 'Stud2': 'Yadhu', 'Stud3': 'Shawn'}
Stud1    Kannika
Stud2      Yadhu
Stud3      Shawn
dtype: object


In [17]:
#creating series obj using list
l=['Banana','Apple','Orange']

s3=pd.Series(data=l)
print(l)
print(s3)
print(s3.dtype)

['Banana', 'Apple', 'Orange']
0    Banana
1     Apple
2    Orange
dtype: object
object


# PANDAS

![image.png](attachment:image.png)

In [46]:
import pandas as pd

# Data represented as a dictionary
data = {
    'Name': ['Steve', 'Lia', 'Vin', 'Katie'],
    'Age': [32, 28, 45, 38],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Rating': [3.45, 4.6, 3.9, 2.78]
}

# Creating the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

    Name  Age  Gender  Rating
0  Steve   32    Male    3.45
1    Lia   28  Female    4.60
2    Vin   45    Male    3.90
3  Katie   38  Female    2.78


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [50]:
import pandas as pd  

df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "Salary": [50000.5, 60000.75, 70000.0]
})

print(df.dtypes)  


Name       object
Age         int64
Salary    float64
dtype: object


![image.png](attachment:image.png)

In [51]:
df["Department"] = ["HR", "IT", "Finance"]  
print(df)


      Name  Age    Salary Department
0    Alice   25  50000.50         HR
1      Bob   30  60000.75         IT
2  Charlie   35  70000.00    Finance


In [52]:
df.drop(columns=["Salary"], inplace=True)
print(df)

      Name  Age Department
0    Alice   25         HR
1      Bob   30         IT
2  Charlie   35    Finance


In [54]:
new_row = {"Name": "David", "Age": 28, "Salary": 55000.0}
df = df.append(new_row, ignore_index=True)  # Appends a new row
print(df)


      Name  Age Department   Salary
0    Alice   25         HR      NaN
1      Bob   30         IT      NaN
2  Charlie   35    Finance      NaN
3    David   28        NaN  55000.0


  df = df.append(new_row, ignore_index=True)  # Appends a new row


In [56]:
# New row as a DataFrame
new_row = pd.DataFrame([{"Name": "David", "Age": 28, "Salary": 55000.0}])

# Use concat() instead of append()
df = pd.concat([df, new_row], ignore_index=True)
print(df)

      Name  Age Department   Salary
0    Alice   25         HR      NaN
1      Bob   30         IT      NaN
2  Charlie   35    Finance      NaN
3    David   28        NaN  55000.0
4    David   28        NaN  55000.0
5    David   28        NaN  55000.0


![image.png](attachment:image.png)

In [58]:
df.at[1, "Age"] = 32  # Updates Age at index 1
df.loc[2, "Name"] = "Chris"  # Updates Name at index 2
print(df)

    Name  Age Department   Salary
0  Alice   25         HR      NaN
1    Bob   32         IT      NaN
2  Chris   35    Finance      NaN
3  David   28        NaN  55000.0
4  David   28        NaN  55000.0
5  David   28        NaN  55000.0


In [59]:
# Change Age for all people older than 30
df.loc[df["Age"] > 30, "Age"] = 29  
print(df)


    Name  Age Department   Salary
0  Alice   25         HR      NaN
1    Bob   29         IT      NaN
2  Chris   29    Finance      NaN
3  David   28        NaN  55000.0
4  David   28        NaN  55000.0
5  David   28        NaN  55000.0


![image.png](attachment:image.png)

![image.png](attachment:image.png)

# Series and DataFrame Attributes

In [None]:
#1️⃣ dtype – Returns the data type of elements

In [60]:
import pandas as pd

s = pd.Series([1, 2, 3])
print(s.dtype)  # Output: int64


int64


In [66]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.dtypes)


Name    object
Age      int64
dtype: object


In [None]:
#2️⃣ index – Provides row labels

In [62]:
s = pd.Series([10, 20, 30], index=["a", "b", "c"])
print(s.index)


Index(['a', 'b', 'c'], dtype='object')


In [64]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.index)#Remember: Row names, not column names


RangeIndex(start=0, stop=2, step=1)


In [None]:
#3️⃣ values – Returns values as a NumPy array

In [65]:
s = pd.Series([10, 20, 30])
print(s.values)  # Output: [10 20 30]


[10 20 30]


In [68]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df)
print(df.values)


    Name  Age
0  Alice   25
1    Bob   30
[['Alice' 25]
 ['Bob' 30]]


In [69]:
#4️⃣ shape – Gives (rows, columns)

In [80]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30],
                   "Dept": ['IT', 'HR']})
print('df.shape is : ',df.shape)  # Output: (2, 3) (Rows, Cols)

#print(df)
print('df.size is : ',df.size)

df.shape is :  (2, 3)
df.size is :  6


In [None]:
#5️⃣ ndim – Number of dimensions

In [73]:
s = pd.Series([10, 20, 30])
print(s.ndim)  # Output: 1 , obvio, series is 1D


1


In [74]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.ndim)  # Output: 2, Pandas is 2D


2


In [None]:
#6️⃣ size – Total elements

In [75]:
s = pd.Series([10, 20, 30])
print(s.size)  # Output: 3


3


In [76]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.size)  # Output: 4 (2 rows × 2 columns)


4


In [None]:
#7️⃣ empty – Checks if DataFrame/Series is empty

In [81]:
s = pd.Series([])
print(s.empty)  # Output: True


True


  s = pd.Series([])


In [82]:
df = pd.DataFrame()
print(df.empty)  # Output: True


True


In [83]:
#8️⃣ columns – Column labels

In [84]:
df = pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]})
print(df.columns)


Index(['Name', 'Age'], dtype='object')


# Series and DataFrame Methods

In [89]:
import pandas as pd
import numpy as np

# Create a Series with random numbers
s = pd.Series(np.random.randn(10))

print("Series:")
print(s)

# Using basic methods
print("\nFirst 5 elements of the Series:\n", s.head())
print("\nLast 3 elements of the Series:\n", s.tail(3))
print("\nDescriptive statistics of the Series:\n", s.describe())

Series:
0    0.928368
1    0.722341
2    1.202860
3    1.005989
4   -1.082005
5   -1.420753
6    0.698111
7   -0.187108
8   -0.148842
9   -1.636953
dtype: float64

First 5 elements of the Series:
 0    0.928368
1    0.722341
2    1.202860
3    1.005989
4   -1.082005
dtype: float64

Last 3 elements of the Series:
 7   -0.187108
8   -0.148842
9   -1.636953
dtype: float64

Descriptive statistics of the Series:
 count    10.000000
mean      0.008201
std       1.067462
min      -1.636953
25%      -0.858281
50%       0.274635
75%       0.876861
max       1.202860
dtype: float64


In [90]:
import pandas as pd
import numpy as np

#Create a Dictionary of series
data = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack']),
   'Age':pd.Series([25,26,25,23,30,29,23]), 
   'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8])}
 
#Create a DataFrame
df = pd.DataFrame(data)
print("Our data frame is:\n")
print(df)

# Using basic methods
print("\nFirst 5 rows of the DataFrame:\n", df.head())
print("\nLast 3 rows of the DataFrame:\n", df.tail(3))
print("\nInfo of the DataFrame:")
df.info()
print("\nDescriptive statistics of the DataFrame:\n", df.describe())

Our data frame is:

    Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20
5  Smith   29    4.60
6   Jack   23    3.80

First 5 rows of the DataFrame:
     Name  Age  Rating
0    Tom   25    4.23
1  James   26    3.24
2  Ricky   25    3.98
3    Vin   23    2.56
4  Steve   30    3.20

Last 3 rows of the DataFrame:
     Name  Age  Rating
4  Steve   30     3.2
5  Smith   29     4.6
6   Jack   23     3.8

Info of the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    7 non-null      object 
 1   Age     7 non-null      int64  
 2   Rating  7 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 296.0+ bytes

Descriptive statistics of the DataFrame:
              Age    Rating
count   7.000000  7.000000
mean   25.857143  3.658571
std     2.734262  0

# Indexing and Selecting Data

![image.png](attachment:image.png)

In [128]:
import pandas as pd
import numpy as np

#generates an 8×4 matrix of random numbers
df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

In [129]:
df

Unnamed: 0,A,B,C,D
a,-0.340562,0.561394,-0.770437,-1.164607
b,0.678444,-0.438576,1.097263,-0.998957
c,0.843044,-1.327232,1.745812,0.538836
d,0.060279,0.713107,0.659845,-0.136736
e,-0.547984,0.912136,-1.300069,0.020804
f,0.166765,0.761448,-1.066903,0.850627
g,-0.773526,0.691266,-1.124756,0.519895
h,-0.943558,1.406458,-0.967562,0.95605


In [108]:
print(df['C'])

a   -0.912201
b   -0.442251
c    1.596141
d    0.878075
e   -0.676119
f    1.553651
g   -0.730460
h   -1.121682
Name: C, dtype: float64


In [106]:
print(df.loc['a'])

A    0.171521
B    0.354774
C   -0.912201
D    1.346789
Name: a, dtype: float64


df.loc[row_label, column_label]
![image.png](attachment:image.png)

In [130]:
print(df.loc[:,'C']) #Same as print(df['C'])

a   -0.770437
b    1.097263
c    1.745812
d    0.659845
e   -1.300069
f   -1.066903
g   -1.124756
h   -0.967562
Name: C, dtype: float64


In [131]:
df.loc['a':'d', ['A', 'C']]  # Selects rows a to d and columns A & C. 
#normal indexing cant do this


Unnamed: 0,A,C
a,-0.340562,-0.770437
b,0.678444,1.097263
c,0.843044,1.745812
d,0.060279,0.659845


In [118]:
df = pd.DataFrame({'A': [10, 20, 30]}, index=[1, 2, 3])
print(df)

    A
1  10
2  20
3  30


In [None]:
print(df[1])  # ERROR! Tries to select column 1 (which doesn't exist)

In [116]:
print(df.loc[1])  # CORRECT! Selects row with index label 1


A    10
Name: 1, dtype: int64


In [123]:
print(df.loc[1,'A'])  # CORRECT! Selects row with index label 1


10


In [132]:
print(df.loc[['a','c'], ['A','C']])

          A         C
a -0.340562 -0.770437
c  0.843044  1.745812


![image.png](attachment:image.png)

In [135]:
df

Unnamed: 0,A,B,C,D
a,-0.340562,0.561394,-0.770437,-1.164607
b,0.678444,-0.438576,1.097263,-0.998957
c,0.843044,-1.327232,1.745812,0.538836
d,0.060279,0.713107,0.659845,-0.136736
e,-0.547984,0.912136,-1.300069,0.020804
f,0.166765,0.761448,-1.066903,0.850627
g,-0.773526,0.691266,-1.124756,0.519895
h,-0.943558,1.406458,-0.967562,0.95605


In [136]:
df.iloc[1:3,0]

b    0.678444
c    0.843044
Name: A, dtype: float64

In [137]:
import pandas as pd
import numpy as np

s=pd.Series(5,index=[0,1,2,3])

In [138]:
s

0    5
1    5
2    5
3    5
dtype: int64

In [31]:
#creating df usinf list
l=[10,20,30,40,50]

df1=pd.DataFrame(l)
print(df1)

    0
0  10
1  20
2  30
3  40
4  50


In [32]:
#creating df usinf list
l=[['a',10],['b',20],['c',30],['d',40],['d',50]]

df1=pd.DataFrame(l)
print(df1)

   0   1
0  a  10
1  b  20
2  c  30
3  d  40
4  d  50


In [1]:
#Accessing Dataframe Row Lables

import pandas as pd

# Create a DataFrame
df = pd.DataFrame({
    'Name': ['Steve', 'Lia', 'Vin', 'Katie'],
    'Age': [32, 28, 45, 38],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Rating': [3.45, 4.6, 3.9, 2.78]},
    index=['r1', 'r2', 'r3', 'r4'])

In [2]:
df

Unnamed: 0,Name,Age,Gender,Rating
r1,Steve,32,Male,3.45
r2,Lia,28,Female,4.6
r3,Vin,45,Male,3.9
r4,Katie,38,Female,2.78


In [3]:
df.index

Index(['r1', 'r2', 'r3', 'r4'], dtype='object')

In [6]:
df.columns

Index(['Name', 'Age', 'Gender', 'Rating'], dtype='object')

In [7]:
#modifying the row index
df.index=[100,200,300,400]
print(df.index)

Int64Index([100, 200, 300, 400], dtype='int64')


In [8]:
df

Unnamed: 0,Name,Age,Gender,Rating
100,Steve,32,Male,3.45
200,Lia,28,Female,4.6
300,Vin,45,Male,3.9
400,Katie,38,Female,2.78


### Slicing a DataFrame Object

![image.png](attachment:image.png)

.iloc[] Syntax:
    ![image.png](attachment:image.png)

In [None]:
#Note that end wont be included in the selection

In [11]:
df

Unnamed: 0,Name,Age,Gender,Rating
100,Steve,32,Male,3.45
200,Lia,28,Female,4.6
300,Vin,45,Male,3.9
400,Katie,38,Female,2.78


In [12]:
df.iloc[0:1,0:2]  

Unnamed: 0,Name,Age
100,Steve,32


.loc[] Syntax:
    ![image.png](attachment:image.png)

In [None]:
#Note that end selection will also be included since this is lable selection

In [13]:
df.loc['100':'300','Age':'Gender']

Unnamed: 0,Age,Gender
100,32,Male
200,28,Female
300,45,Male


In [14]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)

print(df)

   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9


In [15]:
# Slice a single column
col_A = df.iloc[:, 0] #every row, only 0th(A)column
print("Slicing a single column A using iloc[]:")
print(col_A)



Slicing a single column A using iloc[]:
0    1
1    2
2    3
Name: A, dtype: int64


In [16]:
# Slice multiple columns
cols_AB = df.iloc[:, 0:2]#every row, 0th and 1st(A and B)column
print("Slicing multiple columns A and B using iloc[]:")
print(cols_AB)

Slicing multiple columns A and B using iloc[]:
   A  B
0  1  4
1  2  5
2  3  6


In [20]:
#Modifying after slicing
import pandas as pd

# Create a DataFrame
df = pd.DataFrame([['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h']], 
                  columns=['col1', 'col2'])

# Display the Original DataFrame
print("Original DataFrame:", df, sep='\n')

print('  ')
# Modify a subset of the DataFrame using iloc
df.iloc[1:3, 0] = ['x', 'y']

# Display the modified DataFrame
print('Modified DataFrame:',df, sep='\n')

Original DataFrame:
  col1 col2
0    a    b
1    c    d
2    e    f
3    g    h
  
Modified DataFrame:
  col1 col2
0    a    b
1    x    d
2    y    f
3    g    h


### Modifying DataFrame

In [21]:
#Renaming Column or Row Labels of a DataFrame
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]})
print(df)

   A  B
0  1  4
1  2  5
2  3  6


In [22]:
#Renaming the columns
df.rename(columns={'A':'aaa', 'B':'bbb'})

Unnamed: 0,aaa,bbb
0,1,4
1,2,5
2,3,6


In [23]:
#Renaming the row labels
df.rename(index={0:'Stud1', 1:'Stud2'})

Unnamed: 0,A,B
Stud1,1,4
Stud2,2,5
2,3,6


In [26]:
#Adding or Inserting Columns
df=pd.DataFrame({'A':[1, 2,3], 'B':[4,5,6]})

print(df)
print(' ')

df['D']=[7,8,9]

   A  B
0  1  4
1  2  5
2  3  6
 


In [27]:
df

Unnamed: 0,A,B,D
0,1,4,7
1,2,5,8
2,3,6,9


In [None]:
# Inserting a Column at a Specific Position
df.insert(2,'C',[10,11,12])

In [30]:
df

Unnamed: 0,A,B,C,D
0,1,4,10,7
1,2,5,11,8
2,3,6,12,9


In [32]:
#Replacing the Contents of a DataFrame

#Replacing a Columns Values
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]})

# Replace the contents of column 'A' with new values
df['A'] = [10, 20, 30]

# Display updated DataFrame
print("DataFrame after replacing column 'A':")
print(df)


DataFrame after replacing column 'A':
    A  B
0  10  4
1  20  5
2  30  6


In [33]:
#Replace the content using the replace() method
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]})

# Display the Input DataFrame 
print("Original DataFrame:", df, sep='\n')

# Replace the contents 
df.replace({'A': 1, 'B': 6}, 100, inplace=True)

# Display updated DataFrame
print("DataFrame after replacing column 'A':")
print(df)


#Note - The inplace=True parameter ensures that the DataFrame is 
#modified directly rather than returning a new one.

Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6
DataFrame after replacing column 'A':
     A    B
0  100    4
1    2    5
2    3  100


In [37]:
#Deleting Columns - DataFrame.drop()
print(df)
df=df.drop(columns=['A']) #note: you have to re-assign it to the df
print(df)




     A    B
0  100    4
1    2    5
2    3  100
     B
0    4
1    5
2  100


In [41]:
#Deleting Columns - DataFrame.drop()
print(df)
df.drop(columns=['B'], inplace=True) 
print(df)

     B
0    4
1    5
2  100
Empty DataFrame
Columns: []
Index: [0, 1, 2]


In [42]:
df

0
1
2


### Removing Rows from a DataFrame

In [43]:
#Dropping Rows using the drop() method - label or position (integer-based index)
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8]})

# Display original DataFrame
print("Original DataFrame:")
print(df)

# Drop the row with index 5
result = df.drop(3)

# Display the result
print("\nAfter dropping the row at index 5:")
print(result)

Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6
3  4  7
4  5  8

After dropping the row at index 5:
   A  B
0  1  4
1  2  5
2  3  6
4  5  8


In [54]:
df=df.rename(index={0:'a', 1:'b', 2:'c',3:'d'})
print(df)

   A  B
a  1  4
b  2  5
c  3  6
d  4  7
4  5  8


In [55]:
df.drop(['d']) #need to assign this to another dataframe

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6
4,5,8


In [59]:
#Removing Rows Based on a Conditions
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8],
'C': [90, 0, 11, 12, 13]}, index=['r1', 'r2', 'r3', 'r4', 'r5'])

print(df)

    A  B   C
r1  1  4  90
r2  2  5   0
r3  3  6  11
r4  4  7  12
r5  5  8  13


In [61]:
#in this column - 
df['C']

r1    90
r2     0
r3    11
r4    12
r5    13
Name: C, dtype: int64

In [62]:
result = df[df['C'] !=0 ]

In [63]:
print(result)

    A  B   C
r1  1  4  90
r3  3  6  11
r4  4  7  12
r5  5  8  13


In [64]:
#Remove Rows using Index Slicing
df.index[2:4]



Index(['r3', 'r4'], dtype='object')

In [66]:
result = df.drop(df.index[2:4])
print(result)

    A  B   C
r1  1  4  90
r2  2  5   0
r5  5  8  13


In [70]:
#Arithmetic Operations Between Two DataFrames
import pandas as pd

# Create two DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
df2 = pd.DataFrame({'A': [10, 20, 30], 'B': [50, 60, 70]}, index=[1, 2, 3])

print(df1,'\n','\n',df2)

# Perform arithmetic operations
print("\nAddition:\n", df1 + df2)
print("\nSubtraction:\n", df1 - df2)
print("\nMultiplication:\n", df1 * df2)
print("\nDivision:\n", df1 / df2)

   A  B
0  1  5
1  2  6
2  3  7
3  4  8 
 
     A   B
1  10  50
2  20  60
3  30  70

Addition:
       A     B
0   NaN   NaN
1  12.0  56.0
2  23.0  67.0
3  34.0  78.0

Subtraction:
       A     B
0   NaN   NaN
1  -8.0 -44.0
2 -17.0 -53.0
3 -26.0 -62.0

Multiplication:
        A      B
0    NaN    NaN
1   20.0  300.0
2   60.0  420.0
3  120.0  560.0

Division:
           A         B
0       NaN       NaN
1  0.200000  0.120000
2  0.150000  0.116667
3  0.133333  0.114286


In [76]:
add_df = df1.add(df2)

In [77]:
print(add_df)

      A     B
0   NaN   NaN
1  12.0  56.0
2  23.0  67.0
3  34.0  78.0


In [78]:
add_df = df1.add(df2,fill_value=0)
print(add_df)

      A     B
0   1.0   5.0
1  12.0  56.0
2  23.0  67.0
3  34.0  78.0


In [80]:
import pandas as pd

# Import StringIO to load a file-like object for reading CSV
from io import StringIO

# Create string representing CSV data
data = """Name,Gender,Age
Braund,male,22
Cumings,female,38
Heikkinen,female,26
Futrelle,female,35"""

# Use StringIO to convert the string data into a file-like object
obj = StringIO(data)
print(obj)
# read CSV into a Pandas DataFrame
df = pd.read_csv(obj)

print(df)

<_io.StringIO object at 0x000002DDDF3CACB0>
        Name  Gender  Age
0     Braund    male   22
1    Cumings  female   38
2  Heikkinen  female   26
3   Futrelle  female   35


### Reading a JSON File

In [85]:
import pandas as pd
from io import StringIO

# Create a string representing JSON data - list of dictionaries
data = """[
   {"Name": "Braund", "Gender": "Male", "Age": 30},
   {"Name": "Cumings", "Gender": "Female", "Age": 25},
   {"Name": "Heikkinen", "Gender": "Female", "Age": 35}
]"""

print(data)

obj = StringIO(data)
df=pd.read_json(obj)

[
   {"Name": "Braund", "Gender": "Male", "Age": 30},
   {"Name": "Cumings", "Gender": "Female", "Age": 25},
   {"Name": "Heikkinen", "Gender": "Female", "Age": 35}
]


In [86]:
df

Unnamed: 0,Name,Gender,Age
0,Braund,Male,30
1,Cumings,Female,25
2,Heikkinen,Female,35


In [87]:
df.to_json("json_file_sample.json", orient='records',lines=True)

### Reading an Excel File in Pandas

In [90]:
import pandas as pd

df=pd.read_excel('Source_File_Excel.xlsx', sheet_name='Sheet1')

In [91]:
df

Unnamed: 0,Order ID,Date,Product,Category,Quantity,Price,Total Sales
0,1001,2025-01-10,Laptop,Electronics,2,700,1400
1,1002,2025-01-11,Mouse,Accessories,5,20,100
2,1003,2025-01-12,Keyboard,Accessories,3,50,150
3,1004,2025-01-13,Monitor,Electronics,1,250,250
4,1005,2025-01-14,Chair,Furniture,4,100,400


In [92]:
#writing to excel
df=pd.DataFrame([[5,2],[10,4]],index=[1,2],columns=['Five','Two'])


In [94]:
df.to_excel('Numbers.xlsx')

![image.png](attachment:image.png)

### Writing Multiple DataFrames to Different Sheets in Excel

In [112]:
import pandas as pd

df1 = pd.DataFrame({'EmpID':['3730','3731','37312'],'EmpName':['Kannika','Yadhu','']})

In [113]:
df1

Unnamed: 0,EmpID,EmpName
0,3730,Kannika
1,3731,Yadhu
2,37312,


In [114]:
import pandas as pd

df2 = pd.DataFrame({'EmpID':['3730','3731','37312'],'DeptName':['DataServices','Analytics','HR']})

In [115]:
df2

Unnamed: 0,EmpID,DeptName
0,3730,DataServices
1,3731,Analytics
2,37312,HR


In [116]:
with pd.ExcelWriter('OutputMultipleSheets.xlsx') as writer:
    df1.to_excel(writer, sheet_name='Emp_Details')
    df2.to_excel(writer, sheet_name='Department')

![image.png](attachment:image.png)

In [117]:
#Appending Data to an Existing Excel File
#Using ExcelWriter with mode='a'.


df3 = pd.DataFrame({'EmpID':['3734','3735'],'EmpName':['Monica','Rachel']})


In [118]:
df3

Unnamed: 0,EmpID,EmpName
0,3734,Monica
1,3735,Rachel


In [123]:
import pandas as pd
from openpyxl import load_workbook


with pd.ExcelWriter('OutputMultipleSheets.xlsx', mode='a', if_sheet_exists='overlay', engine='openpyxl') as writer:
    df3.to_excel(writer,sheet_name='Emp_Details', index='False' , header=False, startrow=writer.sheets['Emp_Details'].max_row)

In [126]:
df_result_view= pd.read_excel('OutputMultipleSheets.xlsx',sheet_name='Emp_Details')

In [127]:
df_result_view

Unnamed: 0.1,Unnamed: 0,EmpID,EmpName
0,0,3730,Kannika
1,1,3731,Yadhu
2,2,37312,
3,0,3734,Monica
4,1,3735,Rachel


### Reading html data 

In [132]:
import pandas as pd

# Read tables from a SQL tutorial
url = "https://www.tutorialspoint.com/sql/sql-clone-tables.htm"
tables = pd.read_html(url)
print(tables,'\n  ')
# Access the first table from the URL
df = tables[0]

# Display the resultant DataFrame
print( df.head())

[   ID      NAME  AGE    ADDRESS   SALARY
0   1    Ramesh   32  Ahmedabad   2000.0
1   2    Khilan   25      Delhi   1500.0
2   3   Kaushik   23       Kota   2000.0
3   4  Chaitali   25     Mumbai   6500.0
4   5    Hardik   27     Bhopal   8500.0
5   6     Komal   22  Hyderabad   4500.0
6   7     Muffy   24     Indore  10000.0,      Field           Type Null  Key  Default  Extra
0       ID        int(11)   NO  PRI      NaN    NaN
1     NAME    varchar(20)   NO  NaN      NaN    NaN
2      AGE        int(11)   NO  NaN      NaN    NaN
3  ADDRESS       char(25)  YES  NaN      NaN    NaN
4   SALARY  decimal(18,2)  YES  NaN      NaN    NaN,    ID      NAME  AGE    ADDRESS   SALARY
0   1    Ramesh   32  Ahmedabad   2000.0
1   2    Khilan   25      Delhi   1500.0
2   3   Kaushik   23       Kota   2000.0
3   4  Chaitali   25     Mumbai   6500.0
4   5    Hardik   27     Bhopal   8500.0
5   6     Komal   22  Hyderabad   4500.0
6   7     Muffy   24     Indore  10000.0] 
  
   ID      NAME  AGE    