# Steps to optimize Pandas Code

In [2]:
import pandas as pd 
data = {'old_column': [1, 2, 3, 4, 5]} 
df = pd.DataFrame(data) 
df

Unnamed: 0,old_column
0,1
1,2
2,3
3,4
4,5


In [8]:
for index, row in df.iterrows():
    df.at[index, 'new_column']=row['old_column']*2
df

Unnamed: 0,old_column,new_column
0,1,2.0
1,2,4.0
2,3,6.0
3,4,8.0
4,5,10.0


In [9]:
data = {'old_column': [1, 2, 3, 4, 5]} 
df = pd.DataFrame(data)   

# Using vectorized operations 
df['new_column'] = df['old_column'] * 2
print("\nDataFrame after using vectorized operations:") 
df


DataFrame after using vectorized operations:


Unnamed: 0,old_column,new_column
0,1,2
1,2,4
2,3,6
3,4,8
4,5,10


In [4]:
data = {'column': [1.0, 2.5, 3.8, 4.2, 5.6]} 
df = pd.DataFrame(data) 
# Original DataFrame 
print("Original DataFrame:") 
print(df.info()) 

# Check memory usage 
print("\nMemory usage before optimization:") 
print(df.memory_usage())
df

Original DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   column  5 non-null      float64
dtypes: float64(1)
memory usage: 172.0 bytes
None

Memory usage before optimization:
Index     132
column     40
dtype: int64


Unnamed: 0,column
0,1.0
1,2.5
2,3.8
3,4.2
4,5.6


In [15]:
# Optimize memory usage 
df['column'] = df['column'].astype('float32') 
# Updated DataFrame 
print("\nDataFrame after optimizing memory usage:") 
df


DataFrame after optimizing memory usage:


Unnamed: 0,column
0,1.0
1,2.5
2,3.8
3,4.2
4,5.6


In [16]:
print(df.info()) 
# Check memory usage after optimization 
print("\nMemory usage after optimization:") 
print(df.memory_usage()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   column  5 non-null      float32
dtypes: float32(1)
memory usage: 152.0 bytes
None

Memory usage after optimization:
Index     132
column     20
dtype: int64


In [23]:
data = { 
    'column1': [1, 2, None, 4, 5], 
    'column2': ['A', 'B', 'C', 'D', 'E'] } 
df = pd.DataFrame(data)
df = df.dropna(subset =['column1'])
df = df.rename(columns={'column2':'new_column'})
df = df.reset_index(drop=True)
df

Unnamed: 0,column1,new_column
0,1.0,A
1,2.0,B
2,4.0,D
3,5.0,E


In [26]:
# with method chaining
df = (pd.DataFrame(data).dropna(subset=['column1']).rename(columns={'column2':'new_column'}).reset_index(drop=True))
df

Unnamed: 0,column1,new_column
0,1.0,A
1,2.0,B
2,4.0,D
3,5.0,E
