In [None]:
# ## 2. Reading Data from a CSV File
#
# Pandas can read data from various file formats, including CSV.
# The following code reads a CSV file into a DataFrame.
# Uncomment the line below and replace 'your_file.csv' with the actual file path.

# df = pd.read_csv('your_file.csv')

# ## 3. Accessing Data
#
# You can access specific columns, rows, or values in a DataFrame.

# Access a single column
display(df['Name'])

# Access multiple columns
display(df[['Name', 'Salary']])

# Access a specific row by index
display(df.iloc[1])

# ## 4. Filtering Data
#
# Filtering is used to select specific rows based on conditions.

# Select rows where Age is greater than 30
filtered_df = df[df['Age'] > 30]
display(filtered_df)

# ## 5. Adding and Modifying Columns
#
# You can add new columns or modify existing ones.

df['Experience'] = [2, 5, 7, 10]  # Adding a new column
df['Salary'] = df['Salary'] * 1.05  # Increasing salary by 5%
display(df)

# ## 6. Handling Missing Values
#
# Missing values in a DataFrame can be filled or removed.

# Creating a DataFrame with missing values
data_with_nan = {
    'Name': ['Alice', 'Bob', None, 'David'],
    'Age': [25, None, 35, 40],
    'Salary': [50000, 60000, 70000, None]
}
df_nan = pd.DataFrame(data_with_nan)
display(df_nan)

# Fill missing values
df_nan_filled = df_nan.fillna({'Name': 'Unknown', 'Age': df_nan['Age'].mean(), 'Salary': df_nan['Salary'].median()})
display(df_nan_filled)

# ## 7. Grouping and Aggregation
#
# Pandas allows grouping of data to perform aggregate functions.

grouped = df.groupby('Experience').mean()
display(grouped)

# ## 8. Exporting Data
#
# You can export a DataFrame to various formats such as CSV or Excel.

# Export to CSV
df.to_csv('output.csv', index=False)

# Export to Excel
df.to_excel('output.xlsx', index=False)

# ## 9. Summary Statistics
#
# You can quickly get insights into the data using summary functions.

display(df.describe())


# Pandas Data Frames
## 1. Creating a Pandas DataFrame
A DataFrame is a two-dimensional, labeled data structure in Pandas.
It is similar to an Excel spreadsheet or SQL table.

In [35]:
import numpy as np
import pandas as pd

In [95]:
from numpy.random import randn
np.random.seed(101)

In [97]:
# Creating 
df_rnd=pd.DataFrame(data=randn(5,4),index=['A','B','C','D','E'],columns=['W','X','Y','Z'])

In [99]:
df_rnd

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Creating a DataFrame from a dictionary
We create dataframe using a dictionary with scalar (single) values and a dictionary of lists.

In [None]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}
df = pd.DataFrame(data)


In [72]:
my_dict={"Name":"Sam","Age":40,"Salary":10000}

In [74]:
df=pd.DataFrame(data=my_dict,index=[0])

In [76]:
df

Unnamed: 0,Name,Age,Salary
0,Sam,40,10000


In [78]:
my_dict={'Name':['Beryl','Kath','Sam','Mandy'],
        'Age':[25,30,40,23],
        'Salary':[10000,20000,35000,12000],}

In [80]:
df=pd.DataFrame(data=my_dict)

In [82]:
df

Unnamed: 0,Name,Age,Salary
0,Beryl,25,10000
1,Kath,30,20000
2,Sam,40,35000
3,Mandy,23,12000


In [84]:
# Display the DataFrame

display(df)

Unnamed: 0,Name,Age,Salary
0,Beryl,25,10000
1,Kath,30,20000
2,Sam,40,35000
3,Mandy,23,12000


## Selection and Indexing

In [104]:
# Grabbing a column
df_rnd['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [108]:
type(df_rnd['W'])

pandas.core.series.Series

In [110]:
type(df)

pandas.core.frame.DataFrame

In [112]:
df_rnd.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64