In [1]:
import pandas as pd

In [2]:
# merge function will combine two Dataframes based on common column or index.
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [2, 3, 4],
    'Age' : [24, 25, 26]
})

# inner will take only those data that are in both the dataframes.
result = pd.merge(df1, df2, on='ID', how='inner')
result

Unnamed: 0,ID,Name,Age
0,2,Bob,24
1,3,Charlie,25


In [3]:
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [2, 3, 4],
    'Age' : [24, 25, 26]
})

# outer will take data from both the dataframe and will print NaN in those where it is empty.
result = pd.merge(df1, df2, on='ID', how='outer')
result

Unnamed: 0,ID,Name,Age
0,1,Alice,
1,2,Bob,24.0
2,3,Charlie,25.0
3,4,,26.0


In [None]:
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [2, 3, 4],
    'Age' : [24, 25, 26]
})

# left will take all the data from the first dataframe and fill NaN where data for the second dataframe is not available.
result = pd.merge(df1, df2, on='ID', how='left')
result

Unnamed: 0,ID,Name,Age
0,1,Alice,
1,2,Bob,24.0
2,3,Charlie,25.0


In [14]:
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [2, 3, 4],
    'Age' : [24, 25, 26]
})

# right will take all the data from the second dataframe and fill NaN where data for the first dataframe is not available.
result = pd.merge(df1, df2, on='ID', how='right')
result

Unnamed: 0,ID,Name,Age
0,2,Bob,24
1,3,Charlie,25
2,4,,26


In [18]:
# join function is used to merge the dataframes based on index.
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
}).set_index('ID')

df2 = pd.DataFrame({
    'Age' : [24, 25, 26]
}, index = [2, 3, 4])

result = df1.join(df2, how='inner')
result

Unnamed: 0,Name,Age
2,Bob,24
3,Charlie,25


In [23]:
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [4, 5, 6],
    'Name' : ['David', 'Eva', 'Frank']
})

result = pd.concat([df1, df2], axis=0, ignore_index=True)
result

Unnamed: 0,ID,Name
0,1,Alice
1,2,Bob
2,3,Charlie
3,4,David
4,5,Eva
5,6,Frank


In [24]:
df1 = pd.DataFrame({
    'ID' : [1, 2, 3],
    'Name' : ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'ID' : [4, 5, 6],
    'Name' : ['David', 'Eva', 'Frank']
})

result = pd.concat([df1, df2], axis=1, ignore_index=True)
result

Unnamed: 0,0,1,2,3
0,1,Alice,4,David
1,2,Bob,5,Eva
2,3,Charlie,6,Frank


In [36]:
# pivot is used to convert dataframe into a table
# Here, the date becomes the index, city becomes columns and Temperature becomes the values
df = pd.DataFrame({
    'Date' : ['2025-02-01', '2025-02-01', '2025-02-02', '2025-02-02'],
    'City' : ['Ahmedabad', 'Surat', 'Ahmedabad', 'Surat'],
    'Temperature' : [16, 48, 15, 45]
})

pivot_df = df.pivot(index='Date', columns='City', values='Temperature')
pivot_df

City,Ahmedabad,Surat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-02-01,16,48
2025-02-02,15,45


In [None]:
# pivot_table function is similar to pivot, but it also allows aggregation functions to be performed on the values
df = pd.DataFrame({
    'Date': ['2025-02-01', '2025-02-01', '2025-02-02', '2025-02-02'],
    'City': ['NY', 'NY', 'LA', 'LA'],
    'Temperature': [30, 28, 77, 80],
    'Humidity': [65, 70, 50, 45]
})

pivot_table_df = df.pivot_table(index='Date', columns='City', values=['Temperature', 'Humidity'], aggfunc='mean')
pivot_table_df

Unnamed: 0_level_0,Humidity,Humidity,Temperature,Temperature
City,LA,NY,LA,NY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2025-02-01,,67.5,,29.0
2025-02-02,47.5,,78.5,


In [48]:
df = pd.DataFrame({
    'City': ['NY', 'LA'],
    'Date': ['2025-02-01', '2025-02-01'],
    'Temperature': [30, 75],
    'Humidity': [65, 50]
})

melted_df = pd.melt(df, id_vars=['City'], value_vars=None, var_name='Variable', value_name='Value')
melted_df

Unnamed: 0,City,Variable,Value
0,NY,Date,2025-02-01
1,LA,Date,2025-02-01
2,NY,Temperature,30
3,LA,Temperature,75
4,NY,Humidity,65
5,LA,Humidity,50


In [51]:
df = pd.DataFrame({
    'City': ['NY', 'LA'],
    'Date': ['2025-02-01', '2025-02-01'],
    'Temperature': [30, 75],
    'Humidity': [65, 50]
})

melted_df = pd.melt(df, id_vars=['City', 'Date'], value_vars=['Temperature', 'Humidity'], var_name='Variable', value_name='Value')
melted_df

Unnamed: 0,City,Date,Variable,Value
0,NY,2025-02-01,Temperature,30
1,LA,2025-02-01,Temperature,75
2,NY,2025-02-01,Humidity,65
3,LA,2025-02-01,Humidity,50


In [11]:
import timeit

def test_function() :
    return [i for i in range(1000)]


execution_time = timeit.timeit(test_function, number=1000)
execution_time

0.0339960000001156

In [17]:
import cProfile

def slow_function() :
    total = 0
    for i in range(100000):
        total += i
    return total

def fast_function() :
    return sum(range(100000))

cProfile.run('slow_function()')

cProfile.run('fast_function()')

         4 function calls in 0.009 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.009    0.009    0.009    0.009 1624640473.py:3(slow_function)
        1    0.000    0.000    0.009    0.009 <string>:1(<module>)
        1    0.000    0.000    0.009    0.009 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}


         5 function calls in 0.002 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.002    0.002 1624640473.py:9(fast_function)
        1    0.000    0.000    0.002    0.002 <string>:1(<module>)
        1    0.000    0.000    0.002    0.002 {built-in method builtins.exec}
        1    0.002    0.002    0.002    0.002 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' obje

In [18]:
from line_profiler import LineProfiler

profiler = LineProfiler()

profiler.add_function(slow_function)

profiler.run('slow_function()')

profiler.print_stats()

Timer unit: 1e-07 s

Total time: 0.0813054 s
File: C:\Users\Admin-\AppData\Local\Temp\ipykernel_4972\1624640473.py
Function: slow_function at line 3

Line #      Hits         Time  Per Hit   % Time  Line Contents
     3                                           def slow_function() :
     4         1          8.0      8.0      0.0      total = 0
     5    100001     400327.0      4.0     49.2      for i in range(100000):
     6    100000     412711.0      4.1     50.8          total += i
     7         1          8.0      8.0      0.0      return total



In [19]:
df = pd.DataFrame({
    'A' : [1, 2, 3, 4, 5],
    'B' : [5, 4, 3, 2, 1],
    'C' : [10, 20, 30, 40, 50]
})

result = df.query('A > 2 and B < 4')
result

Unnamed: 0,A,B,C
2,3,3,30
3,4,2,40
4,5,1,50


In [120]:
df.eval('D = A + B + C', inplace=True)
df

Unnamed: 0,A,B,C,D
0,1,5,10,16
1,2,4,20,26
2,3,3,30,36
3,4,2,40,46
4,5,1,50,56


In [20]:
df.eval('E = (A + B) * (A > 2) + (A - B) * (A <= 2)', inplace=True)
df

Unnamed: 0,A,B,C,E
0,1,5,10,-4
1,2,4,20,-2
2,3,3,30,6
3,4,2,40,6
4,5,1,50,6
