In [45]:
import pandas as pd

### Creating a DataFrame

In [46]:
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Emma'],
        'Age': [25, 30, 35, 28, 40],
        'Gender': ['Female', 'Male', 'Male', 'Male', 'Female'],
        'Salary': [50000, 60000, 70000, 55000, 75000]}
df = pd.DataFrame(data)


### Viewing Data

In [47]:
print(df.head())

      Name  Age  Gender  Salary
0    Alice   25  Female   50000
1      Bob   30    Male   60000
2  Charlie   35    Male   70000
3    David   28    Male   55000
4     Emma   40  Female   75000


### Information

In [48]:
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    5 non-null      object
 1   Age     5 non-null      int64 
 2   Gender  5 non-null      object
 3   Salary  5 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 292.0+ bytes
None


### Descriptive Statistics

In [49]:
print(df.describe())


            Age        Salary
count   5.00000      5.000000
mean   31.60000  62000.000000
std     5.94138  10368.220677
min    25.00000  50000.000000
25%    28.00000  55000.000000
50%    30.00000  60000.000000
75%    35.00000  70000.000000
max    40.00000  75000.000000


### Indexing and Slicing

In [50]:
# Accessing a column
print(df['Name'])

# Accessing a row
print(df.iloc[0])  # Using integer-location based indexing


0      Alice
1        Bob
2    Charlie
3      David
4       Emma
Name: Name, dtype: object
Name       Alice
Age           25
Gender    Female
Salary     50000
Name: 0, dtype: object


### Filtering Data

In [51]:
# Filtering based on condition
print(df[df['Age'] > 25])


      Name  Age  Gender  Salary
1      Bob   30    Male   60000
2  Charlie   35    Male   70000
3    David   28    Male   55000
4     Emma   40  Female   75000


### Adding a New Column

In [52]:
df['Profession'] = ['Engineer', 'Doctor', 'Artist', 'Musician', 'Plumber']
print(df)


      Name  Age  Gender  Salary Profession
0    Alice   25  Female   50000   Engineer
1      Bob   30    Male   60000     Doctor
2  Charlie   35    Male   70000     Artist
3    David   28    Male   55000   Musician
4     Emma   40  Female   75000    Plumber


### Handling Missing Data

In [53]:
# Drop rows with any missing values
df.dropna(inplace=True)

# Fill missing values with a specific value
df.fillna(0, inplace=True)


### Grouping Data

In [54]:

grouped = df.groupby('Gender')

# Calculate the mean age and salary for each gender group
grouped_mean = grouped[['Age', 'Salary']].mean()

grouped_mean

Unnamed: 0_level_0,Age,Salary
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,32.5,62500.0
Male,31.0,61666.666667


### Sorting Data

In [55]:
sorted_df = df.sort_values(by='Age', ascending=False)
print(sorted_df)


      Name  Age  Gender  Salary Profession
4     Emma   40  Female   75000    Plumber
2  Charlie   35    Male   70000     Artist
1      Bob   30    Male   60000     Doctor
3    David   28    Male   55000   Musician
0    Alice   25  Female   50000   Engineer


### eading and Writing Data

In [56]:
# Reading from CSV
df = pd.read_csv('data.csv')

# Writing to CSV
df.to_csv('new_data.csv', index=False)

df


Unnamed: 0,Name,Age,Gender,Salary
0,Alice,25,Female,50000
1,Bob,30,Male,60000
2,Charlie,35,Male,70000
3,David,28,Male,55000
4,Emma,40,Female,75000
