In [1]:
import numpy as np
import pandas as pd

# Introduction to pandas Data Structures
To get started with pandas, you will need to get comfortable with its two workhorse
data structures: **Series and DataFrame**. While they are not a universal solution for
every problem, they provide a solid, easy-to-use basis for most applications.

# Pandas Series Object

<b>A Series</b> is the primary building block of pandas.

Series represents a `one-dimensional` `labeled indexed array` based on the NumPy ndarray.

Like an array, a Series can hold zero or more values of any single data type

labelelled index array sai muraad hai ke hum apni marzi sai index desakte hain

# Creating Series
A Series can be created and initialized by passing either a <b>scalar value,
a NumPy ndarray, a Python list, or a Python Dict</b> as the data parameter of
the Series constructor. This is the default parameter and does not need to
be specified if it is the first item.

In [3]:
# Create one item series
# 2 is scalar so we have to understand the output 0 is the index of that value 2
s1 = pd.Series(2)
s1

0    2
dtype: int64

In [5]:
# Create a series of multiple items from a list
# ye labelled indexes hain joke default arhe hain series ke andar
s2 = pd.Series([1,2,3,4,5])
s2

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [6]:
# Get the values in the series
# agar series mai sai sirf values uthaani hain to values ki property use karenge

s2.values

array([1, 2, 3, 4, 5], dtype=int64)

In [7]:
# Get the index of the series
s2.index

RangeIndex(start=0, stop=5, step=1)

In [9]:
# Explicitly create and index
# index is alpha, not integer
# jab ismain humne labelled indexes diye ismai to mojood ismai dono hote hain, yaani yahan par integer indexing bhi chalegi..
# ..joke position based hai, or labelled indexing bhi chalegi, jo position based indexing hoti hai wo out nhi hoti balke ..
# .. mojoood rehti hain
s3 = pd.Series([1,2,3], index=['a','b','c'])
s3

a    1
b    2
c    3
dtype: int64

In [11]:
# Lookup by label value, not integer position
print(f"value by label 's3['c']' is {s3['c']} and value by index 's3[2]' is {s3[2]}")
# access both by label and index

value by label 's3['c']' is 3 and value by index 's3[2]' is 3


In [13]:
# Create a series from an existing index
# Scalar value will be copied at each index label
s4 = pd.Series(["A","B","C","D","E"], index=s2.index)
s4

0    A
1    B
2    C
3    D
4    E
dtype: object

In [14]:
# Create sereis from dict
# to ismai keys hamare pass labelled indexes bangyi hain
s4 = pd.Series({
    'a': 1,
    'b': 2,
    'c': 3,
    'd': 4
})

s4

a    1
b    2
c    3
d    4
dtype: int64

In [15]:
# NumPy array bhi pass karsakte hain

s5 = pd.Series(np.array([22,23,44,55,66]))
s5

0    22
1    23
2    44
3    55
4    66
dtype: int32

<h3>Size, shape, uniqueness, and counts of values</h3>

In [19]:
# Example series, which also contains a NaN means empty value
s= pd.Series([0,1,1,2,3,4,4,5,6,7,np.nan])
s

0     0.0
1     1.0
2     1.0
3     2.0
4     3.0
5     4.0
6     4.0
7     5.0
8     6.0
9     7.0
10    NaN
dtype: float64

In [20]:
print(len(s))
print(s.size)     # number of elements
print(s.shape)
print(s.count())   # count return not null values   >>>> not null values kitni hain
print(s.unique())
print(s.value_counts())   # kosni value kitni dafa hai

11
11
(11,)
10
[ 0.  1.  2.  3.  4.  5.  6.  7. nan]
4.0    2
1.0    2
7.0    1
6.0    1
5.0    1
3.0    1
2.0    1
0.0    1
dtype: int64


<h3>Peeking at data with heads, tails, and take</h3>

In [22]:
# First five (by default)
# jab hamare pass bohot bara data hota hai to hum sirf first 5 rows uthalete hain using head()
s.head()

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
dtype: float64

In [24]:
# First three (we can also specify)

# s.head(3)     >>>>> same as below >>>>>>>
s.head(n = 3)

0    0.0
1    1.0
2    1.0
dtype: float64

In [26]:
# Last five (by default)
s.tail()

6     4.0
7     5.0
8     6.0
9     7.0
10    NaN
dtype: float64

In [27]:
# Last three (we can also specify)

# s.tail(3)    >>>>>>   same as below     >>>>>>>>>>
s.tail(n =3)

8     6.0
9     7.0
10    NaN
dtype: float64

In [28]:
#The .take() method will return the rows in a series that correspond to the zero-based positions:

# only take specific items      >>>>>    like fancy indexing   >>>>>>>

s.take([9,3,9])

9    7.0
3    2.0
9    7.0
dtype: float64

# Looking up values in Series

In [29]:
# Single item lookup

print(s3)
s3['a']

a    1
b    2
c    3
dtype: int64


1

In [35]:
#Accessing this Series using an integer value will perform a zero-based position lookup of the value:

# lookup by position since the index is not an integer
s3[1]

# jab main apne labelled index provide karunga or wo bhi integer mai karunga to by default indexing wo nhi karega mere labell
# ke accordance hee karega

2

In [36]:
# Multiple items
s3[['c', 'a']]

c    3
a    1
dtype: int64

In [37]:
# Series with an integer index, but not starting with 0
# ab yahan mai ne labelled indexing di hain to kia ye integer based indexing samjh rha hai ya labelled base ?
s5 = pd.Series([1,2,3], index=[2,3,4])
s5

2    1
3    2
4    3
dtype: int64

# label-based lookup versus position-based lookup

In [38]:
s5[2]  # 2 is considered as label based look up
       # coz label also has 2 init

1

In [39]:
s5[0]   # now see in this case we have integer label lookup,position lookup is not working

KeyError: 0

In [40]:
s5.loc[2]  # loc also works on label based look up

1

In [41]:
# integer location lao mai forcefully keh rha hun
s5.iloc[2]  #iLoc forcefully works on position based look up even you dont specify position based index

3

In [42]:
# Multiple items by label (loc)
s5.loc[[4,3]]

4    3
3    2
dtype: int64

In [46]:
s5[[0,2]]

KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

In [47]:
s5.iloc[[0,2]]

2    1
4    3
dtype: int64

In [48]:
s5.iloc[[0,2,3]]   # integer location will throw an exception

IndexError: positional indexers are out-of-bounds

# Alignment via index labels
* alignment ka word jab use karte hain jab 2 cheezon ko barabar karte hain, eik doosre ke saath alignment karte hain

In [49]:
s6 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s6

a    1
b    2
c    3
d    4
dtype: int64

In [50]:
s7 = pd.Series([4, 3, 2, 1], index=['d', 'c', 'b', 'a'])
s7

d    4
c    3
b    2
a    1
dtype: int64

In [52]:
# Add them
# Ye apna same label dhoondega add hone keliye numPy ki tarah nhi add hoga lekin agar label naa ho ?
s6 + s7    # it first aligns the data as per label then perfroms operation

a    2
b    4
c    6
d    8
dtype: int64

<h3>-Nan + number = NaN </h3>      (NaN added to a number results in NaN)
    
<h3>-number + NaN = Nan</h3>        (Number added to a Nan results in NaN)

In [53]:
s8 = pd.Series({
    'a': 1,
    'b': 2,
    'c': 3,
    'd': 5
})

s8

a    1
b    2
c    3
d    5
dtype: int64

In [55]:
s9 = pd.Series({
    'b': 6,
    'c': 7,
    'd': 9,
    'e': 10
})

s9

b     6
c     7
d     9
e    10
dtype: int64

Ab yahan a ko a nhi mil rha, b ko b mil gya, c ko c milgya, d ko d mil gya, or e ko 3 nhi mil rha

* Ab yahan alignment karne keliye wo dono taraf ke labels ko barabar karega yaani pehli series mai a nhi hai to pehle ye a create karega, phir ye aage move karega phir dekhega ke e nhi hai to ye phir e create karega, to dono taraf hojayega abcde, yaani dono lables align hgye, yaani har lablel dono ke pass agya to question ye hai ke isne a align kia hai uski value kia hogi? or secondly usne e align kia hai to e ki value kia hogi ? to ANS is `NaN`, jo labell missing add hoga uski value `NaN` hogi

In [56]:
# NaN's result for a and e
# demonstrates alignment
s8 + s9

a     NaN
b     8.0
c    10.0
d    14.0
e     NaN
dtype: float64

In [57]:
s10 = pd.Series([1.0, 2.0, 3.0], index=['a', 'a', 'b'])
s10

a    1.0
a    2.0
b    3.0
dtype: float64

In [58]:
s11 = pd.Series([4.0, 5.0, 6.0], index=['a', 'a', 'c'])
s11

a    4.0
a    5.0
c    6.0
dtype: float64