# Pandas Data Structures



## 1. Series

### Creating Series

In [2]:
import pandas as pd
import numpy as np

# From list
s1 = pd.Series([10, 20, 30, 40])

# With custom index
s2 = pd.Series([100, 200, 300], index=['a', 'b', 'c'])

# From dictionary
s3 = pd.Series({'x': 1, 'y': 2, 'z': 3})

print(s1)
print(s2)
print(s3)

0    10
1    20
2    30
3    40
dtype: int64
a    100
b    200
c    300
dtype: int64
x    1
y    2
z    3
dtype: int64


### Indexing Series

In [3]:
# Position-based indexing
print(s2[0])
print(s2[1:3])

# Label-based indexing
print(s2['a'])
print(s2[['a', 'c']])

100
b    200
c    300
dtype: int64
100
a    100
c    300
dtype: int64


  print(s2[0])


### Series Attributes

In [4]:
print('values:', s2.values)
print('index:', s2.index)
print('dtype:', s2.dtype)
print('size:', s2.size)
print('name:', s2.name)

values: [100 200 300]
index: Index(['a', 'b', 'c'], dtype='object')
dtype: int64
size: 3
name: None


### Vectorized Operations on Series

In [5]:
prices = pd.Series([100, 120, 90, 110])

# Vectorized math
print(prices * 1.1)
print(prices + 5)

# Boolean filtering
print(prices[prices > 100])

0    110.0
1    132.0
2     99.0
3    121.0
dtype: float64
0    105
1    125
2     95
3    115
dtype: int64
1    120
3    110
dtype: int64


### Handling Missing Values in Series

In [6]:
s_missing = pd.Series([1, np.nan, 3, None, 5])

print('original:\n', s_missing)
print('isna:\n', s_missing.isna())
print('filled:\n', s_missing.fillna(0))
print('dropped:\n', s_missing.dropna())

original:
 0    1.0
1    NaN
2    3.0
3    NaN
4    5.0
dtype: float64
isna:
 0    False
1     True
2    False
3     True
4    False
dtype: bool
filled:
 0    1.0
1    0.0
2    3.0
3    0.0
4    5.0
dtype: float64
dropped:
 0    1.0
2    3.0
4    5.0
dtype: float64


## 2. DataFrame

### Creating DataFrames

In [7]:
# From dictionary of lists
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': [25, 30, 35],
    'salary': [50000, 60000, 70000]
})

print(df1)

      name  age  salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


In [8]:
# From list of dictionaries
data = [
    {'name': 'Alice', 'age': 25},
    {'name': 'Bob', 'age': 30},
    {'name': 'Charlie', 'age': 35}
]

df2 = pd.DataFrame(data)
print(df2)

      name  age
0    Alice   25
1      Bob   30
2  Charlie   35


In [9]:
# From NumPy array
arr = np.array([[1, 2], [3, 4], [5, 6]])
df3 = pd.DataFrame(arr, columns=['A', 'B'])
print(df3)

   A  B
0  1  2
1  3  4
2  5  6


### Creating DataFrames from Files

In [10]:
# Example (file not loaded here)
# df_csv = pd.read_csv('data.csv')
# df_excel = pd.read_excel('data.xlsx')

print("Use pd.read_csv(), pd.read_excel(), pd.read_json() for file-based loading")

Use pd.read_csv(), pd.read_excel(), pd.read_json() for file-based loading


### DataFrame Shape, Size, and Memory

In [11]:
print('shape:', df1.shape)
print('size:', df1.size)
print('memory usage:\n', df1.memory_usage(deep=True))
print('info():')
df1.info()

shape: (3, 3)
size: 9
memory usage:
 Index     132
name      162
age        24
salary     24
dtype: int64
info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    3 non-null      object
 1   age     3 non-null      int64 
 2   salary  3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes


### Columns vs Index

In [12]:
print('columns:', df1.columns)
print('index:', df1.index)

# Column access
print(df1['age'])

# Index-based row access
print(df1.loc[0])
print(df1.iloc[1])

columns: Index(['name', 'age', 'salary'], dtype='object')
index: RangeIndex(start=0, stop=3, step=1)
0    25
1    30
2    35
Name: age, dtype: int64
name      Alice
age          25
salary    50000
Name: 0, dtype: object
name        Bob
age          30
salary    60000
Name: 1, dtype: object
