**Creating Pandas Series**

In [None]:
import pandas as pd

data = ['Steve', '35', 'Male', '3.5']
series = pd.Series(data, index=['Name', 'Age', 'Gender', 'Rating'])
print(series)

Name      Steve
Age          35
Gender     Male
Rating      3.5
dtype: object


**Creating Simple DataFrame**

In [None]:
import pandas as pd

# Data represented as a dictionary
data = {
    'Name': ['Steve', 'Lia', 'Vin', 'Katie'],
    'Age': [32, 28, 45, 38],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Rating': [3.45, 4.6, 3.9, 2.78]
}

# Creating the DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

    Name  Age  Gender  Rating
0  Steve   32    Male    3.45
1    Lia   28  Female    4.60
2    Vin   45    Male    3.90
3  Katie   38  Female    2.78


In [None]:
import pandas as pd

# Data represented as a dictionary
data = {
    'Name': ['Steve', 'Lia', 'Vin', 'Katie'],
    'Age': [32, 28, 45, 38],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Rating': [3.45, 4.6, 3.9, 2.78]
}

# Creating the DataFrame
df = pd.DataFrame(data)

# Display a Series within a DataFrame
print(df['Name'])

0    Steve
1      Lia
2      Vin
3    Katie
Name: Name, dtype: object


**Pandas Series**

In [None]:
#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np
data = np.array(['a','b','c','d'])
s = pd.Series(data,index=[100,101,102,103])
print("Output:\n",s)

Output:
 100    a
101    b
102    c
103    d
dtype: object


In [None]:
#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data)
print(s)

a    0.0
b    1.0
c    2.0
dtype: float64


**Series Slicing**

In [None]:
import pandas as pd
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])

#retrieve the first three element
print(s[:3])
#retrieve the last three element
print(s[-3:])
# Slice multiple elements
print(s['a':'d'])

a    1
b    2
c    3
dtype: int64
c    3
d    4
e    5
dtype: int64
a    1
b    2
c    3
d    4
dtype: int64


**Modifying Values after Slicing**

In [None]:
import pandas as pd
s = pd.Series([1,2,3,4,5])

# Display the original series
print("Original Series:\n",s)

# Modify the values of first two elements
s[:2] = [100, 200]

print("Series after modifying the first two elements:",s)

Original Series:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
Series after modifying the first two elements: 0    100
1    200
2      3
3      4
4      5
dtype: int64


**Converting Series to other objects**

In [None]:
import pandas as pd

# Create a Pandas Series
s = pd.Series([1, 2, 3])

# Convert Series to a Python list
result = s.to_list()

print("Output:",result)
print("Output Type:", type(result))

# Convert Series to a NumPy Array
result = s.to_numpy()

print("Output:",result)
print("Output Type:", type(result))

Output: [1, 2, 3]
Output Type: <class 'list'>
Output: [1 2 3]
Output Type: <class 'numpy.ndarray'>


In [None]:
import pandas as pd

# Create a Pandas Series
s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

# Convert Series to a Python dictionary
result = s.to_dict()

print("Output:",result)
print("Output Type:", type(result))

Output: {'a': 1, 'b': 2, 'c': 3}
Output Type: <class 'dict'>


**Arithmetic operations between two Series**

In [None]:
import pandas as pd

# Create Series s1 with indexes: a, b, c, d, e
s1 = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])

# Create Series s2 with indexes: x, a, b, c
s2 = pd.Series([9, 8, 6, 5], index=['x', 'a', 'b', 'c'])

# Arithmetic operations
print('\nAddition:\n', s1 + s2)
print('\nSubtraction:\n', s1 - s2)
print('\nMultiplication:\n', s1 * s2)
print('\nDivision:\n', s1 / s2)


Addition:
 a    9.0
b    8.0
c    8.0
d    NaN
e    NaN
x    NaN
dtype: float64

Subtraction:
 a   -7.0
b   -4.0
c   -2.0
d    NaN
e    NaN
x    NaN
dtype: float64

Multiplication:
 a     8.0
b    12.0
c    15.0
d     NaN
e     NaN
x     NaN
dtype: float64

Division:
 a    0.125000
b    0.333333
c    0.600000
d         NaN
e         NaN
x         NaN
dtype: float64


**Creating Pandas DataFrame**

In [None]:
import pandas as pd
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)

     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13


In [None]:
import pandas as pd
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
print(df)

    Name  Age
0    Tom   28
1   Jack   34
2  Steve   29
3  Ricky   42


In [None]:
import pandas as pd

d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
print(df)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4


**Label-Based Indexing with .loc**

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), # Create a DataFrame 'df' with random numbers (8 rows and 4 columns)
                  index=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], # Rows are labeled with custom index ['a' to 'h']
                  columns=['A', 'B', 'C', 'D']) # Columns are named 'A', 'B', 'C', and 'D'

# Display the original DataFrame
print("Original DataFrame:\n", df)

# .loc is used for label-based indexing (row and column labels)
print('\nResult:\n', df.loc[:, 'A'])  # ':' means all rows, 'A' specifies the column


Original DataFrame:
           A         B         C         D
a -0.090608  0.072678  1.506535 -0.993647
b  0.758503  0.070799 -0.094593 -0.872920
c -0.216746  0.394235  2.939156 -1.737788
d  1.233330 -1.036672  0.583306  0.519301
e -1.705275  0.155426  1.431306 -0.014671
f  2.205250  0.463624 -1.417899 -1.536805
g -1.127973 -1.531993 -0.731792 -0.434562
h -1.228793  1.231154 -0.567553 -0.134365

Result:
 a   -0.090608
b    0.758503
c   -0.216746
d    1.233330
e   -1.705275
f    2.205250
g   -1.127973
h   -1.228793
Name: A, dtype: float64


In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])

# Select few rows for multiple columns, say list[]
print(df.loc[['a','b','f','h'],['A','C']])

          A         C
a -0.743335  0.358730
b  0.902213 -0.447809
f -0.253850 -1.795058
h -0.597445  0.137873


**Integer Position-Based Indexing with .iloc**

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

print("Original DataFrame:\n", df)

# select all rows for a specific column
print('\nResult:\n',df.iloc[:4])

Original DataFrame:
           A         B         C         D
0  0.095874 -0.993600 -0.690025  1.403570
1 -1.071942  0.630795 -2.255658 -0.782670
2  1.189937 -0.103614 -0.289287 -0.203541
3 -0.783170 -0.172301  0.521611 -0.535989
4 -1.442989  1.296939 -1.225757  1.994669
5 -0.941695 -0.490993 -0.620028 -0.977928
6 -1.036550  0.700464 -0.820962  0.183185
7  0.366967  0.696571  0.256713 -0.893327

Result:
           A         B         C         D
0  0.095874 -0.993600 -0.690025  1.403570
1 -1.071942  0.630795 -2.255658 -0.782670
2  1.189937 -0.103614 -0.289287 -0.203541
3 -0.783170 -0.172301  0.521611 -0.535989


In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Integer slicing
print(df.iloc[1:5, 2:4])
print(df.iloc[[1, 3, 5], [1, 3]])

          C         D
1  0.491485 -0.656520
2  0.115554  0.593020
3  0.513944 -1.046037
4 -1.614940  0.163669
          B         D
1 -0.560321 -0.656520
3 -0.466191 -1.046037
5 -1.172950  0.355877


In [None]:
import pandas as pd

data = {'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}
df = pd.DataFrame(data)

# Slice a single column
col_A = df.iloc[:, 0]
print("Slicing a single column A using iloc[]:")
print(col_A)

# Slice multiple columns
cols_AB = df.iloc[:, 0:2]
print("Slicing multiple columns A and B using iloc[]:")
print(cols_AB)

Slicing a single column A using iloc[]:
0    1
1    2
2    3
Name: A, dtype: int64
Slicing multiple columns A and B using iloc[]:
   A  B
0  1  4
1  2  5
2  3  6


**Indexing with Brackets**

In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Accessing Multiple Columns
print(df['A'])
print(df[['A', 'B']])

0   -2.376217
1   -0.914106
2    0.131745
3    0.399043
4    0.625874
5   -0.013148
6    1.739137
7    1.783914
Name: A, dtype: float64
          A         B
0 -2.376217  2.646317
1 -0.914106  2.124020
2  0.131745 -0.590083
3  0.399043 -0.367624
4  0.625874  0.329894
5 -0.013148  0.040176
6  1.739137  0.508661
7  1.783914 -0.556763


In [None]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Accessing Rows
print(df[:1])

          A         B         C         D
0  0.393932 -1.029468 -1.980494  0.427398


**Modifying Values After Slicing**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame([['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h']],
                  columns=['col1', 'col2'])

# Display the Original DataFrame
print("Original DataFrame:", df, sep='\n')

# Modify a subset of the DataFrame using iloc
df.iloc[1:3, 0] = ['x', 'y']

# Display the modified DataFrame
print('Modified DataFrame:',df, sep='\n')

Original DataFrame:
  col1 col2
0    a    b
1    c    d
2    e    f
3    g    h
Modified DataFrame:
  col1 col2
0    a    b
1    x    d
2    y    f
3    g    h


In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame([['a', 'b'], ['c', 'd'], ['e', 'f'], ['g', 'h']],
                  columns=['col1', 'col2'])

# Display the Original DataFrame
print("Original DataFrame:", df, sep='\n')

# Modify a subset of the DataFrame using iloc
df.iloc[1:3, 0:2] = [['x', 'y'], ['z', 'A']]

# Display the modified DataFrame
print('Modified DataFrame:',df, sep='\n')

Original DataFrame:
  col1 col2
0    a    b
1    c    d
2    e    f
3    g    h
Modified DataFrame:
  col1 col2
0    a    b
1    x    y
2    z    A
3    g    h


**Renaming Column/Rows Labels in a DataFrame**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]})

# Rename column 'A' to 'aa'
df = df.rename(columns={'A': 'aa'})

# Display modified DataFrame
print("Modified DataFrame:")
print(df)

# Rename the multiple row labels
df = df.rename(index={ 0: 'r1', 1:'r2', 2:'r3'})

# Display modified DataFrame
print("Modified DataFrame:")
print(df)

Modified DataFrame:
   aa  B
0   1  4
1   2  5
2   3  6
Modified DataFrame:
    aa  B
r1   1  4
r2   2  5
r3   3  6


**Adding or inserting new columns**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6]})

# Add a new column 'C' with values
df['C'] = [7, 8, 9]

# Display updated DataFrame
print("DataFrame after adding a new column 'C':")
print(df)

# Insert a new column 'D' at position 1
df.insert(1, 'D', [10, 11, 12])

# Display updated DataFrame
print("DataFrame after inserting column 'D' at position 1:")
print(df)

DataFrame after adding a new column 'C':
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9
DataFrame after inserting column 'D' at position 1:
   A   D  B  C
0  1  10  4  7
1  2  11  5  8
2  3  12  6  9


**Replacing the Contents of a DataFrame**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6],'c':[7, 8, 9]})

# Replace the contents of column 'A' with new values
df['A'] = [10, 20, 30]

# Display updated DataFrame
print("DataFrame after replacing column 'A':")
print(df)

# Replace the contents
df.replace({'A': {1: 100}, 'B': {6: 200}, 'c':{7:1000}}, inplace=True)
#df.replace({'A': 1, 'B': 6}, 100, inplace=True)

# Display updated DataFrame
print("DataFrame after replacement:")
print(df)

DataFrame after replacing column 'A':
    A  B  c
0  10  4  7
1  20  5  8
2  30  6  9
DataFrame after replacement:
    A    B     c
0  10    4  1000
1  20    5     8
2  30  200     9


**Deleting/Dropping Columns**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3],'B': [4, 5, 6],'C': [7, 8, 9]})

# Display the original DataFrame
print("Original DataFrame:", df, sep='\n')

# Delete columns 'A' and 'B'
df = df.drop(columns=['A', 'B'])

# Display updated DataFrame
print("DataFrame after deleting columns 'A' and 'B':")
print(df)

Original DataFrame:
   A  B  C
0  1  4  7
1  2  5  8
2  3  6  9
DataFrame after deleting columns 'A' and 'B':
   C
0  7
1  8
2  9


**Removing Rows from a DataFrame**

**Dropping DataFrame Rows by Index Values**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8]})

# Display original DataFrame
print("Original DataFrame:")
print(df)

# Drop the row with index 3
result = df.drop(3)

# Display the result
print("\nAfter dropping the row at index 3:")
print(result)

Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6
3  4  7
4  5  8

After dropping the row at index 3:
   A  B
0  1  4
1  2  5
2  3  6
4  5  8


**Dropping Multiple Rows by Labels**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8],
'C': [9, 10, 11, 12, 13]}, index=['r1', 'r2', 'r3', 'r4', 'r5'])

# Display original DataFrame
print("Original DataFrame:")
print(df)

# Drop the rows by row-labels
result = df.drop(['r1', 'r3'])

# Display the result
print("\nAfter dropping the rows:")
print(result)

Original DataFrame:
    A  B   C
r1  1  4   9
r2  2  5  10
r3  3  6  11
r4  4  7  12
r5  5  8  13

After dropping the rows:
    A  B   C
r2  2  5  10
r4  4  7  12
r5  5  8  13


**Removing Rows Based on a Condition**

In [None]:
import pandas as pd
# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8],
'C': [90, 0, 11, 12, 13]}, index=['r1', 'r2', 'r3', 'r4', 'r5'])

# Display original DataFrame
print("Original DataFrame:")
print(df)

# Dropping rows where column 'C' contains 0
result = df[df["C"] != 0]

# Display the result
print("\nAfter dropping the row where 'C' has 0:")
print(result)

Original DataFrame:
    A  B   C
r1  1  4  90
r2  2  5   0
r3  3  6  11
r4  4  7  12
r5  5  8  13

After dropping the row where 'C' has 0:
    A  B   C
r1  1  4  90
r3  3  6  11
r4  4  7  12
r5  5  8  13


**Removing Rows using Index Slicing**

In [None]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'A': [1, 2, 3, 4, 5],'B': [4, 5, 6, 7, 8]})

# Display original DataFrame
print("Original DataFrame:")
print(df)

# Drop the row using the index slicing
result = df.drop(df.index[2:4])

# Display the result
print("\nAfter dropping the row at 2 and 3:")
print(result)

Original DataFrame:
   A  B
0  1  4
1  2  5
2  3  6
3  4  7
4  5  8

After dropping the row at 2 and 3:
   A  B
0  1  4
1  2  5
4  5  8


**Arithmetic Operations Between Two DataFrames**

In [None]:
import pandas as pd

# Create two DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
df2 = pd.DataFrame({'A': [10, 20, 30], 'B': [50, 60, 70]}, index=[1, 2, 3])

# Display the input DataFrames
print("DataFrame 1:\n", df1)
print("\nDataFrame 2:\n", df2)

# Perform arithmetic operations
print("\nAddition of Two DataFrames:\n", df1 + df2)
print("\nSubtraction of Two DataFrames:\n", df1 - df2)
print("\nMultiplication of Two DataFrames:\n", df1 * df2)
print("\nDivision of Two DataFrames:\n", df1 / df2)

DataFrame 1:
    A  B
0  1  5
1  2  6
2  3  7
3  4  8

DataFrame 2:
     A   B
1  10  50
2  20  60
3  30  70

Addition of Two DataFrames:
       A     B
0   NaN   NaN
1  12.0  56.0
2  23.0  67.0
3  34.0  78.0

Subtraction of Two DataFrames:
       A     B
0   NaN   NaN
1  -8.0 -44.0
2 -17.0 -53.0
3 -26.0 -62.0

Multiplication of Two DataFrames:
        A      B
0    NaN    NaN
1   20.0  300.0
2   60.0  420.0
3  120.0  560.0

Division of Two DataFrames:
           A         B
0       NaN       NaN
1  0.200000  0.120000
2  0.150000  0.116667
3  0.133333  0.114286


In [None]:
import pandas as pd

df1 = pd.DataFrame({'A': [10, 20], 'B': [30, 40]})
df2 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
print(df1+df2)


    A   B
0  11  33
1  22  44


In [None]:
df1 = pd.DataFrame({'A': [10, 20], 'B': [30, 40]})
df2 = pd.DataFrame({'A': [1, 2]})  # 'B' is missing here

# Using +
print("Using +:\n", df1 + df2)
print("\n")
#using .add
print("using .add:\n", df1.add(df2, fill_value=0))


Using +:
     A   B
0  11 NaN
1  22 NaN


using .add:
     A     B
0  11  30.0
1  22  40.0


In [None]:
import pandas as pd

df = pd.DataFrame({'A': [10, 20], 'B': [30, 40]})
s = pd.Series([1, 2], index=[0, 1])  # Aligns with rows

# Add Series to DataFrame row-wise (default behavior)
print(df.add(s, axis=0))

s2 = pd.Series([1, 100], index=['A', 'B'])  # Aligns with columns

# Add Series to DataFrame column-wise
print(df.add(s2, axis=1))


    A   B
0  11  31
1  22  42
    A    B
0  11  130
1  21  140


In [None]:
# Import StringIO to load a file-like object for reading CSV
from io import StringIO

# Create string representing CSV data
data = """S.No,Name,Age,City,Salary
1,Tom,28,Toronto,20000
2,Lee,32,HongKong,3000
3,Steven,43,Bay Area,8300
4,Ram,38,Hyderabad,3900"""

# Use StringIO to convert the string data into a file-like object
obj = StringIO(data)

# read CSV into a Pandas DataFrame
df = pd.read_csv(obj)

print(df)

   S.No    Name  Age       City  Salary
0     1     Tom   28    Toronto   20000
1     2     Lee   32   HongKong    3000
2     3  Steven   43   Bay Area    8300
3     4     Ram   38  Hyderabad    3900


In [None]:
import pandas as pd

# dictionary of lists
d = {'Car': ['BMW', 'Lexus', 'Audi', 'Mercedes', 'Jaguar', 'Bentley'],
'Date_of_purchase': ['2024-10-10', '2024-10-12', '2024-10-17', '2024-10-16', '2024-10-19', '2024-10-22']}

# creating dataframe from the above dictionary of lists
dataFrame = pd.DataFrame(d)
print("Original DataFrame:\n",dataFrame)

# write dataFrame to SalesRecords CSV file
dataFrame.to_csv("Output_written_CSV_File.csv")

# display the contents of the output csv
print("The output csv file written successfully...")

Original DataFrame:
         Car Date_of_purchase
0       BMW       2024-10-10
1     Lexus       2024-10-12
2      Audi       2024-10-17
3  Mercedes       2024-10-16
4    Jaguar       2024-10-19
5   Bentley       2024-10-22
The output csv file written successfully...
