In [2]:
pip install -U pandas

Note: you may need to restart the kernel to use updated packages.


# Creating a DataFrame

In [4]:
import pandas as pd

# Pandas 1.0
df1 = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [25, 30]})

# Pandas 2.0
df2 = pd.DataFrame({'name': ['Carol', 'Dave'], 'occupation': ['Software Engineer', 'Data Scientist']})

# Merging DataFrames in Pandas 1.0
merged_df1 = pd.merge(df1, df2, on='name', how='inner')

# Merging DataFrames in Pandas 2.0
merged_df2 = pd.merge(df1, df2, on='name', how='inner', indicator=True)

# Printing the merged DataFrames
print(merged_df1)
print(merged_df2)


Empty DataFrame
Columns: [name, age, occupation]
Index: []
Empty DataFrame
Columns: [name, age, occupation, _merge]
Index: []


In [5]:
import pandas as pd

# Create a DataFrame with nullable data types
df = pd.DataFrame({'name': ['Alice', 'Bob', None], 'age': [25, 30, None]})

# Print the DataFrame
print(df)


    name   age
0  Alice  25.0
1    Bob  30.0
2   None   NaN


# copy-on-write optimization:

In [6]:
import pandas as pd

# Create a DataFrame
df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [25, 30]})

# View the memory usage of the DataFrame
print(df.memory_usage())

# Add a new column to the DataFrame
df['occupation'] = ['Software Engineer', 'Data Scientist']

# View the memory usage of the DataFrame again
print(df.memory_usage())


Index    132
name      16
age       16
dtype: int64
Index         132
name           16
age            16
occupation     16
dtype: int64


As you can see, the memory usage of the DataFrame does not change when a new column is added. This is because Pandas 2.0 uses copy-on-write optimization to defer copying the DataFrame until it is necessary.

In [9]:
import pandas as pd

# Pandas 1.0
df1 = pd.DataFrame({'name': ['Alice', 'Bob', None], 'age': [25, 30, None]})

# Pandas 2.0
df2 = pd.DataFrame({'name': ['Carol', 'Dave'], 'age': [25, 30]})

# Dropping missing values in Pandas 1.0
df1 = df1.dropna()

# Dropping missing values in Pandas 2.0
df2 = df2.dropna(subset=['age'])

# Printing the DataFrames
print(df1)
print(df2)


    name   age
0  Alice  25.0
1    Bob  30.0
    name  age
0  Carol   25
1   Dave   30


As you can see, the Dropped DataFrames are different. In Pandas 2.0, you can specify which columns to drop missing values from. This is a new feature in Pandas 2.0.