In [1]:
import pandas as pd

## Q1. Create a DataFrame from a dictionary of lists.

In [3]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8]}
df = pd.DataFrame(data)
df

Unnamed: 0,X,Y
0,1,5
1,2,6
2,3,7
3,4,8


## Q2. Select the first 3 rows of a DataFrame.

In [4]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8]}
df = pd.DataFrame(data)
df.head(3)

Unnamed: 0,X,Y
0,1,5
1,2,6
2,3,7


## Q3. Select the 'X' column from a DataFrame.

In [5]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8]}
df = pd.DataFrame(data)
df["X"]

0    1
1    2
2    3
3    4
Name: X, dtype: int64

## Q4. Filter rows based on a column condition.

In [6]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8]}
df = pd.DataFrame(data)
filtered_df = df[df["X"] > 2]
filtered_df

Unnamed: 0,X,Y
2,3,7
3,4,8


## Q5. Add a new column to an existing DataFrame.

In [7]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8]}
df = pd.DataFrame(data)
df["Z"] = df["X"] + df["Y"]
df

Unnamed: 0,X,Y,Z
0,1,5,6
1,2,6,8
2,3,7,10
3,4,8,12


## Q6. Remove a column from a DataFrame.

In [10]:
data = {"X" : [1,2,3,4], "Y" : [5,6,7,8], "Z" : [9,10,11,12]}
df = pd.DataFrame(data)
df.drop(columns= ['Z'], inplace= True)
df

Unnamed: 0,X,Y
0,1,5
1,2,6
2,3,7
3,4,8


## Q7. Sort a DataFrame by a column.

In [12]:
data = {"X" : [4,3,2,1], "Y" : [9,8,7,6]}
df = pd.DataFrame(data)
df.sort_values(by= ["X"], inplace= True)
df

Unnamed: 0,X,Y
3,1,6
2,2,7
1,3,8
0,4,9


## Q8. Group a DataFrame by a column and calculate the mean of each group.

In [15]:
data = {'X': [1, 2, 1, 2], 'Y': [5, 6, 7, 8]}
df = pd.DataFrame(data)
grouped_df = df.groupby('X').mean()
print(grouped_df)

     Y
X     
1  6.0
2  7.0


## Q9. Replace missing values in a DataFrame.

In [16]:
data = {"X" : [1,2,None,4], "Y" : [5,None,7,8]}
df = pd.DataFrame(data)
df.fillna(0, inplace= True)
df

Unnamed: 0,X,Y
0,1.0,5.0
1,2.0,0.0
2,0.0,7.0
3,4.0,8.0


## Q10. Convert a column to datetime.

In [19]:
data = {"X" : ['2020-01-01', '2020-01-02', '2020-01-03']}
df = pd.DataFrame(data)
df["X"] = pd.to_datetime(df["X"])
df

Unnamed: 0,X
0,2020-01-01
1,2020-01-02
2,2020-01-03


## Q11. Create a DataFrame with specific column names.

In [20]:
data = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
df = pd.DataFrame(data)
print(df)

   col1  col2
0     1     4
1     2     5
2     3     6


## Q12. Calculate the sum of values in each column

In [21]:
data = {"X" : [1,2,3], "Y" : [4,5,6]}
df = pd.DataFrame(data)
print(df.sum())

X     6
Y    15
dtype: int64


## Q13. Calculate the mean of values in each row.

In [26]:
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
print(df)
print(df.mean(axis= 1))     ## axis = 1 means column wise in pandas

   X  Y
0  1  4
1  2  5
2  3  6
0    2.5
1    3.5
2    4.5
dtype: float64


In [None]:
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
print(df.mean(axis=0))    ## axis = 0 row wise

X    2.0
Y    5.0
dtype: float64


## Q14. Concatenate two DataFrames.

In [4]:
data1 = {"X" : [1,2,3]}
data2 = {"Y" : [4,5,6]}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
concatenated_df = pd.concat([df1, df2], axis= 1)
print(concatenated_df)

   X  Y
0  1  4
1  2  5
2  3  6


## Q15. Merge two DataFrames on a key.

In [7]:
data1 = {'key': ['X', 'Y', 'Z'], 'value1': [1, 2, 3]}
data2 = {'key': ['X', 'Y', 'D'], 'value2': [4, 5, 6]}
df1 = pd.DataFrame(data1)
print(df1)
df2 = pd.DataFrame(data2)
print(df2)
merged_df = pd.merge(df1,df2, on="key")
print(merged_df)

  key  value1
0   X       1
1   Y       2
2   Z       3
  key  value2
0   X       4
1   Y       5
2   D       6
  key  value1  value2
0   X       1       4
1   Y       2       5


## Q16. Create a pivot table from a DataFrame.

In [None]:
data = {'X': ['foo', 'foo', 'bar', 'bar'], 'Y': ['one', 'two', 'one', 'two'], 'Z': [1, 2, 3, 4]}
df = pd.DataFrame(data)
pivot_table = df.pivot_table(values='Z', index='X', columns='Y')
pivot_table

Y,one,two
X,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,3.0,4.0
foo,1.0,2.0


## Q17. Reshape a DataFrame from long to wide format.

In [10]:
data = {'X': ['foo', 'foo', 'bar', 'bar'], 'Y': ['one', 'two', 'one', 'two'], 'Z': [1, 2, 3, 4]}
df = pd.DataFrame(data)
wide_df = df.pivot(index='X', columns='Y', values='Z')
print(wide_df) 

Y    one  two
X            
bar    3    4
foo    1    2


## Q18. Calculate the correlation between columns in a DataFrame.

In [11]:
data = {'X': [1, 2, 3, 4], 'Y': [4, 3, 2, 1]}
df = pd.DataFrame(data)
correlation = df.corr()
print(correlation)

     X    Y
X  1.0 -1.0
Y -1.0  1.0


## Q19. Iterate over rows in a DataFrame using iterrows().

In [12]:
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
for index, row in df.iterrows():
    print(index, row['X'], row['Y'])

0 1 4
1 2 5
2 3 6


## Q20. Apply a function to each element in a DataFrame.

In [13]:
# Create a sample DataFrame
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
# Apply a function to each element using the map method
df = df.apply(lambda col: col.map(lambda x: x * 2))
print(df)


   X   Y
0  2   8
1  4  10
2  6  12
