In [1]:
import pandas as pd

# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)

# Creating a Series from a list
ser = pd.Series([1, 2, 3, 4, 5])

print(df)
print(ser)


      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
0    1
1    2
2    3
3    4
4    5
dtype: int64


In [2]:
print(df.head())
print(df.tail())
print(df.info())
print(df.describe())


      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
None
        Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


In [3]:
# Selecting a single column
print(df['Name'])

# Selecting multiple columns
print(df[['Name', 'City']])

# Selecting rows by position
print(df.iloc[0])

# Selecting rows by label
print(df.loc[0])


0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name         City
0    Alice     New York
1      Bob  Los Angeles
2  Charlie      Chicago
Name       Alice
Age           25
City    New York
Name: 0, dtype: object
Name       Alice
Age           25
City    New York
Name: 0, dtype: object


In [4]:
# Filter rows where age is greater than 30
filtered_df = df[df['Age'] > 30]
print(filtered_df)


      Name  Age     City
2  Charlie   35  Chicago


In [5]:
# Adding a new column
df['Salary'] = [50000, 60000, 70000]
print(df)


      Name  Age         City  Salary
0    Alice   25     New York   50000
1      Bob   30  Los Angeles   60000
2  Charlie   35      Chicago   70000


In [6]:
# Creating a DataFrame with missing values
data_with_nan = {'Name': ['Alice', 'Bob', None],
                 'Age': [25, None, 35],
                 'City': ['New York', 'Los Angeles', None]}
df_nan = pd.DataFrame(data_with_nan)

# Detecting missing values
print(df_nan.isnull())

# Dropping rows with missing values
print(df_nan.dropna())

# Filling missing values
print(df_nan.fillna('Unknown'))


    Name    Age   City
0  False  False  False
1  False   True  False
2   True  False   True
    Name   Age      City
0  Alice  25.0  New York
      Name      Age         City
0    Alice     25.0     New York
1      Bob  Unknown  Los Angeles
2  Unknown     35.0      Unknown


In [7]:
# Creating two DataFrames
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})

# Merging DataFrames
merged_df = pd.merge(df1, df2, on='key', how='inner')
print(merged_df)


  key  value1  value2
0   A       1       4
1   B       2       5


In [8]:
# Grouping data by 'City' and calculating the mean age
grouped = df.groupby('City').mean()
print(grouped)

# Aggregating data
aggregated = df.groupby('City').agg({'Age': 'mean', 'Salary': 'sum'})
print(aggregated)


TypeError: agg function failed [how->mean,dtype->object]

In [9]:
# Sorting by age
sorted_df = df.sort_values(by='Age')
print(sorted_df)


      Name  Age         City  Salary
0    Alice   25     New York   50000
1      Bob   30  Los Angeles   60000
2  Charlie   35      Chicago   70000


In [10]:
# Creating a pivot table
pivot_table = df.pivot_table(values='Salary', index='City', columns='Name', aggfunc='mean')
print(pivot_table)

# Melting a DataFrame
melted_df = pd.melt(df, id_vars=['Name'], value_vars=['Age', 'City'])
print(melted_df)


Name           Alice      Bob  Charlie
City                                  
Chicago          NaN      NaN  70000.0
Los Angeles      NaN  60000.0      NaN
New York     50000.0      NaN      NaN
      Name variable        value
0    Alice      Age           25
1      Bob      Age           30
2  Charlie      Age           35
3    Alice     City     New York
4      Bob     City  Los Angeles
5  Charlie     City      Chicago
