In [1]:
import pandas as pd

In [2]:
pd.__version__

'2.1.1'

# Series

### Usually Series handles One dim arrays & Data frames handles multi dim arrays
Imagine, each series is a column in a CSV / Excel

## Way 1: Create Series

In [3]:
A = pd.Series([2,3,4,5], index=["a", "b", "c", "d"]) # If index is not given, default indeces will be 0 to n

In [4]:
type(A)

pandas.core.series.Series

## Get Series Values

In [5]:
A.values

array([2, 3, 4, 5])

In [6]:
type(A.values)

numpy.ndarray

## Get Series Index names

In [7]:
A.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [8]:
type(A.index)

pandas.core.indexes.base.Index

## Get values by index

In [9]:
A["a"]

2

## Way 2: Create Series from Dictionary Obj

In [10]:
grades_dict = {"A": 4, "A-": 3.5, "B": 3, "B-": 2.5}
grades_series = pd.Series(grades_dict)

grades_series

A     4.0
A-    3.5
B     3.0
B-    2.5
dtype: float64

In [11]:
grades_series.values

array([4. , 3.5, 3. , 2.5])

In [12]:
grades_series.index

Index(['A', 'A-', 'B', 'B-'], dtype='object')

## Series slicing

In [13]:
A["a": "c"] # both starting and end index are inclusive

a    2
b    3
c    4
dtype: int64

In [14]:
A[1:3] # Slice by index numbers

b    3
c    4
dtype: int64

## Slicing by index number if the explicit indeces are of int type

In [43]:
data = pd.Series(["a", "b", "c"], index=[1, 3, 5])

data

1    a
3    b
5    c
dtype: object

In [46]:
data[1:3] # Default / Implicit Indices

3    b
5    c
dtype: object

In [48]:
data.iloc[1:3] # Default / Implicit Indices

3    b
5    c
dtype: object

In [49]:
data.loc[1:3] # Explicit Indices

1    a
3    b
dtype: object

# Data Frame

### Data frame is a multi dim array eg: CSV / Excel

## Create Data Frame from Series objects

In [16]:
marks_dict = {"A": 85, "A-": 80, "B": 75, "B-": 70}
marks_series = pd.Series(marks_dict)

marks_series

A     85
A-    80
B     75
B-    70
dtype: int64

In [17]:
grades_series

A     4.0
A-    3.5
B     3.0
B-    2.5
dtype: float64

In [32]:
D = pd.DataFrame({"Marks": marks_series, "Grades": grades_series})

In [33]:
D

Unnamed: 0,Marks,Grades
A,85,4.0
A-,80,3.5
B,75,3.0
B-,70,2.5


In [20]:
D.T

Unnamed: 0,A,A-,B,B-
Marks,85.0,80.0,75.0,70.0
Grades,4.0,3.5,3.0,2.5


In [21]:
D.values

array([[85. ,  4. ],
       [80. ,  3.5],
       [75. ,  3. ],
       [70. ,  2.5]])

In [22]:
D.columns

Index(['Marks', 'Grades'], dtype='object')

In [23]:
D.index # Row names

Index(['A', 'A-', 'B', 'B-'], dtype='object')

In [24]:
D.values[2,0]

75.0

## Add Extra column / Series in the Data frame

In [25]:
D["ScaledMarks"] = (D["Marks"]/100)*90

In [26]:
D

Unnamed: 0,Marks,Grades,ScaledMarks
A,85,4.0,76.5
A-,80,3.5,72.0
B,75,3.0,67.5
B-,70,2.5,63.0


## Delete column

In [27]:
del D["ScaledMarks"]

In [28]:
D

Unnamed: 0,Marks,Grades
A,85,4.0
A-,80,3.5
B,75,3.0
B-,70,2.5


## Filter Rows

In [29]:
D[D["Marks"] > 75]

Unnamed: 0,Marks,Grades
A,85,4.0
A-,80,3.5


## Missing Values

In [39]:
A = pd.DataFrame([{"a":1, "b": 4}, {"b": -3, "c": 9}])
A

Unnamed: 0,a,b,c
0,1.0,4,
1,,-3,9.0


In [40]:
A.fillna(0)

Unnamed: 0,a,b,c
0,1.0,4,0.0
1,0.0,-3,9.0


In [42]:
A.dropna? # deletes all rows with empty values