In [2]:
import pandas as pd

In [3]:
# Create a Pandas Series
data = [10, 20, 30, 40, 50]
series = pd.Series(data)

# Print the Series
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [4]:
# Create a DataFrame using a dictionary
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 24, 35, 32],
    'City': ['New York', 'Paris', 'London', 'Berlin']
}

df = pd.DataFrame(data)

# Print the DataFrame
print(df)

    Name  Age      City
0   John   28  New York
1   Anna   24     Paris
2  Peter   35    London
3  Linda   32    Berlin


In [5]:
# Access the 'Name' column
print(df['Name'])

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object


In [6]:
# Access 'Name' and 'Age' columns
print(df[['Name', 'Age']])

    Name  Age
0   John   28
1   Anna   24
2  Peter   35
3  Linda   32


In [7]:
# Access the first row
print(df.iloc[0])

Name        John
Age           28
City    New York
Name: 0, dtype: object


In [8]:
# Add a new column 'Salary'
df['Salary'] = [50000, 55000, 60000, 65000]
print(df)


    Name  Age      City  Salary
0   John   28  New York   50000
1   Anna   24     Paris   55000
2  Peter   35    London   60000
3  Linda   32    Berlin   65000


In [9]:
# Increase all salaries by 10%
df['Salary'] = df['Salary'] * 1.10
print(df)


    Name  Age      City   Salary
0   John   28  New York  55000.0
1   Anna   24     Paris  60500.0
2  Peter   35    London  66000.0
3  Linda   32    Berlin  71500.0


In [10]:
# Drop the 'City' column
df = df.drop(columns=['City'])
print(df)


    Name  Age   Salary
0   John   28  55000.0
1   Anna   24  60500.0
2  Peter   35  66000.0
3  Linda   32  71500.0


In [11]:
# Get rows where Age is greater than 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)


    Name  Age   Salary
2  Peter   35  66000.0
3  Linda   32  71500.0


In [12]:
# Get rows where Age is greater than 30 and Salary is less than 60000
filtered_df = df[(df['Age'] > 30) & (df['Salary'] < 60000)]
print(filtered_df)

Empty DataFrame
Columns: [Name, Age, Salary]
Index: []


In [13]:
# Create a DataFrame with missing data
df_missing = pd.DataFrame({
    'Name': ['John', 'Anna', 'Peter', None],
    'Age': [28, None, 35, 32],
    'City': ['New York', 'Paris', 'Berlin', 'London']
})

# Check for missing values
print(df_missing.isnull())

    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False  False
3   True  False  False


In [14]:
# Fill missing values in 'Age' column with the mean of the column
df_missing['Age'].fillna(df_missing['Age'].mean(), inplace=True)

print(df_missing)

    Name        Age      City
0   John  28.000000  New York
1   Anna  31.666667     Paris
2  Peter  35.000000    Berlin
3   None  32.000000    London


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_missing['Age'].fillna(df_missing['Age'].mean(), inplace=True)


In [15]:
# Save the DataFrame to a CSV file
df.to_csv('output.csv', index=False)

In [16]:
# Read data from a CSV file
df_new = pd.read_csv('output.csv')

print(df_new)

    Name  Age   Salary
0   John   28  55000.0
1   Anna   24  60500.0
2  Peter   35  66000.0
3  Linda   32  71500.0


In [20]:
# Group by 'City' and calculate the mean of 'Age'
grouped_df = df.groupby('Name')['Age'].mean()

print(grouped_df)

Name
Anna     24.0
John     28.0
Linda    32.0
Peter    35.0
Name: Age, dtype: float64


In [21]:
# Create two DataFrames
df1 = pd.DataFrame({
    'EmployeeID': [1, 2, 3, 4],
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
})

df2 = pd.DataFrame({
    'EmployeeID': [1, 2, 3, 5],
    'Department': ['HR', 'Finance', 'IT', 'Marketing'],
})

# Merge the two DataFrames on 'EmployeeID'
merged_df = pd.merge(df1, df2, on='EmployeeID', how='inner')

print(merged_df)

   EmployeeID   Name Department
0           1   John         HR
1           2   Anna    Finance
2           3  Peter         IT


In [22]:
# Left Join
left_joined = pd.merge(df1, df2, on='EmployeeID', how='left')
print(left_joined)

   EmployeeID   Name Department
0           1   John         HR
1           2   Anna    Finance
2           3  Peter         IT
3           4  Linda        NaN
