In [8]:
import pandas as pd
import numpy as np  

# 1. Create DataFrame from List
data = [["Sush", 20, "Switzerland"], ["Bobby", 21, "London"], ["Deepu", 20, "Paris"]]
a = pd.DataFrame(data, columns=["name", "age", "city"])
print(" DataFrame from List: ")
print(a)


#2 
filtered_a = a[a['age'] > 28]
print("\n Filtered DataFrame (age > 28):")
print(filtered_a)

# 5. Sort DataFrame (by age, descending)
sorted_a = a.sort_values(by='age', ascending=False)
print("\n DataFrame Sorted by 'age' (descending):")
print(sorted_a)

# 6. Summary Statistics (describe)
print("\n Descriptive Statistics (mean, std, etc.):")
print(a.describe())  # Assuming numerical columns

# 7. Selecting Rows & Columns (iloc, loc)
specific_row = a.iloc[0]  # 1st row using integer indexing
specific_col = a.loc[:, 'city']  # 'city' column using label indexing
print("\n Selected Row (iloc[0]) & Column ('city', loc):")
print(specific_row)
print(specific_col)

# 8. Handling Missing Values (fillna)
a_with_missing = pd.DataFrame({'col1': [1, None, 3], 'col2': ['a', 'b', np.nan]})
print("\n DataFrame with Missing Values:")
print(a_with_missing)

a_with_missing.fillna(0, inplace=True)  # Fill missing values with 0
print("\n DataFrame with Missing Values Filled (0):")
print(a_with_missing)

# 9. Apply Function to Each Value (apply)
def discount_age(age):
  if age > 30:
    return age * 0.9  # Apply 10% discount for age > 30
  else:
    return age

a['discounted_age'] = a['age'].apply(discount_age)
print("\n Apply Function to 'age' (discount for >30):")
print(a)

# 10. Concatenate DataFrames (concat)
a2 = pd.DataFrame({'name': ['David', 'Emily'], 'age': [27, 25]})
concat_a = pd.concat([a, a2], ignore_index=True)  # Concatenate vertically, reset index
print("\n Concatenated DataFrames (ignore_index=True):")
print(concat_a)

# 11. Group Data (groupby, mean)
average_age_by_city = a.groupby('city')['age'].mean()
print("\n Average Age Grouped by 'city':")
print(average_age_by_city)

# 12. Get Unique Values (unique)
unique_names = a['name'].unique()
print("\n Unique Names in 'name' Column:")
print(unique_names)

# 13. Count Occurrences (value_counts)
name_counts = a['name'].value_counts()
print("\n Name Occurrences (value_counts):")
print(name_counts)

# 14. Rename Columns (rename)
a.rename(columns={'age': 'years_old'}, inplace=True)
print("\n DataFrame with Renamed Column ('age' to 'years_old'):")
print(a)

# 15. Drop Rows/Columns (drop)
dropped_a = a.drop('occupation', axis=1)  # Drop 'occupation' column (axis=1)
print("\n DataFrame with 'occupation' Column Dropped:")
print(dropped_a)

# 16. Iterate Through Rows (iterrows)
for index, row in a.iterrows():
  print(f"\n Row Index {index}: {row.to_list()}")

# 17. Access Values by Label (at)
print("\n Access Value at 'name' for 2nd Row (index 1):")
print(a.at[1, 'name'])

# 18. Select Rows Based on Multiple Conditions
filtered_a = a[(a['years_old'] > 28) & (a['city'] == 'New York')]
print("\n Filtered DataFrame (age > 28 & city='New York'):")
print(filtered_a)

# 19. Merge DataFrames (merge)
df_scores = pd.DataFrame({'name': ['Alice', 'Bob', 'Emily'], 'score': [85, 90, 78]})
merged_a = pd.merge(a, df_scores, on='name', how='left')  # Left join on 'name'
print("\n Merged DataFrames (left join on 'name'):")
print(merged_a)

# 20. Create Dummy Variables (get_dummies)
dummy_a = pd.get_dummies(a['city'])  # One-hot encode 'city' column
print("\n DataFrame with Dummy Variables for 'city':")
print(pd.concat([a, dummy_a], axis=1))  # Concatenate with original DataFrame





 DataFrame from List: 
    name  age         city
0   Sush   20  Switzerland
1  Bobby   21       London
2  Deepu   20        Paris

 Filtered DataFrame (age > 28):
Empty DataFrame
Columns: [name, age, city]
Index: []

 DataFrame Sorted by 'age' (descending):
    name  age         city
1  Bobby   21       London
0   Sush   20  Switzerland
2  Deepu   20        Paris

 Descriptive Statistics (mean, std, etc.):
             age
count   3.000000
mean   20.333333
std     0.577350
min    20.000000
25%    20.000000
50%    20.000000
75%    20.500000
max    21.000000

 Selected Row (iloc[0]) & Column ('city', loc):
name           Sush
age              20
city    Switzerland
Name: 0, dtype: object
0    Switzerland
1         London
2          Paris
Name: city, dtype: object

 DataFrame with Missing Values:
   col1 col2
0   1.0    a
1   NaN    b
2   3.0  NaN

 DataFrame with Missing Values Filled (0):
   col1 col2
0   1.0    a
1   0.0    b
2   3.0    0

 Apply Function to 'age' (discount for >30):


KeyError: "['occupation'] not found in axis"

In [6]:

print("\n Access Row (index 1) & Column ('name'):")
print(a.iloc[1])
print(a['name'])


 Access Row (index 1) & Column ('name'):
name                 Bob
years_old             30
city              London
occupation        Doctor
discounted_age        30
Name: 1, dtype: object
0      Alice
1        Bob
2    Charlie
Name: name, dtype: object


In [5]:
a['occupation'] = ['Teacher', 'Doctor', 'Engineer']
print("\n DataFrame with New Column ('occupation'):")
print(a)


 DataFrame with New Column ('occupation'):
      name  years_old      city occupation  discounted_age
0    Alice         25  New York    Teacher              25
1      Bob         30    London     Doctor              30
2  Charlie         28     Paris   Engineer              28
