In [None]:
import pandas as pd

In [None]:
# Create a DataFrame from a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 45],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Miami']
}
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston
4      Eva   45        Miami


In [None]:
# Accessing columns
print(df['Name'])  # Access the 'Name' column
print("\n")
print(df['Age'])   # Access the 'Age' column

0      Alice
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object


0    25
1    30
2    35
3    40
4    45
Name: Age, dtype: int64


In [None]:
# Accessing rows
print(df.loc[0])   # Access the first row by label
print("\n")
print(df.iloc[2])  # Access the third row by integer index

Name       Alice
Age           25
City    New York
Name: 0, dtype: object


Name    Charlie
Age          35
City    Chicago
Name: 2, dtype: object


In [None]:
# Slicing rows
print(df[1:4])  # Slice rows from index 1 to 3

      Name  Age         City
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston


In [None]:
# Filtering data
filtered_df = df[df['Age'] > 30]  # Filter rows where Age is greater than 30
print(filtered_df)

      Name  Age     City
2  Charlie   35  Chicago
3    David   40  Houston
4      Eva   45    Miami


In [None]:
# Adding a new column
df['Country'] = ['USA', 'USA', 'USA', 'USA', 'USA']
print(df)

      Name  Age         City Country
0    Alice   25     New York     USA
1      Bob   30  Los Angeles     USA
2  Charlie   35      Chicago     USA
3    David   40      Houston     USA
4      Eva   45        Miami     USA


In [None]:
# Modifying a column
df['Age'] = df['Age'] + 5  # Increase the Age of everyone by 5 years
print(df)

      Name  Age         City Country
0    Alice   30     New York     USA
1      Bob   35  Los Angeles     USA
2  Charlie   40      Chicago     USA
3    David   45      Houston     USA
4      Eva   50        Miami     USA


In [None]:
# Dropping a column
df.drop('Country', axis=1, inplace=True)
print(df)

      Name  Age         City
0    Alice   30     New York
1      Bob   35  Los Angeles
2  Charlie   40      Chicago
3    David   45      Houston
4      Eva   50        Miami


In [None]:
# Summary statistics
print(df.describe())

             Age
count   5.000000
mean   40.000000
std     7.905694
min    30.000000
25%    35.000000
50%    40.000000
75%    45.000000
max    50.000000


In [None]:
# Saving to a CSV file
df.to_csv('sample_data.csv', index=False)

In [None]:
# Reading from a CSV file
loaded_df = pd.read_csv('sample_data.csv')
print(loaded_df)

      Name  Age         City
0    Alice   30     New York
1      Bob   35  Los Angeles
2  Charlie   40      Chicago
3    David   45      Houston
4      Eva   50        Miami


In [None]:
# Creating a Series from a list
data = [10, 20, 30, 40, 50]
s = pd.Series(data)
print(s)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [None]:
#Accessing elements by index
print(s[0])  # Access the first element
print(s[2])  # Access the third element

10
30


In [None]:
# Setting custom index labels
custom_index = ['A', 'B', 'C', 'D', 'E']
s = pd.Series(data, index=custom_index)
print(s)

A    10
B    20
C    30
D    40
E    50
dtype: int64


In [None]:
# Accessing elements by custom index
print(s['A'])  # Access element with label 'A'
print(s['C'])  # Access element with label 'C'

10
30


In [None]:
print(s.loc['B'])

20


In [None]:
print(s.iloc[4])

50


In [None]:
# Filtering data
filtered_s = s[s > 30]  # Filter elements greater than 30
print(filtered_s)

D    40
E    50
dtype: int64


In [None]:
# Arithmetic operations
s1 = pd.Series([1, 2, 3], index=['A', 'B', 'C'])
print(s1)
s2 = pd.Series([10, 20, 30], index=['A', 'B', 'D'])
print(s2)

A    1
B    2
C    3
dtype: int64
A    10
B    20
D    30
dtype: int64


In [None]:
# Adding Series (alignment based on index)
result = s1 + s2
print(result)

A    11.0
B    22.0
C     NaN
D     NaN
dtype: float64


In [None]:
# Handling missing data (NaN)
s3 = pd.Series([1, 2, 3], index=['A', 'B', 'C'])
print(s3)
s4 = pd.Series([10, 20], index=['A', 'B'])
print(s4)

A    1
B    2
C    3
dtype: int64
A    10
B    20
dtype: int64
