<a href="https://colab.research.google.com/github/sayaliph18/Python-Pandas/blob/main/Pandas_Series_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import numpy as np

## **Series**

In [8]:
n = np.arange(1,11)
pd.Series(n)

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [9]:
# Q. Create A Series using a dict
pd.Series({'A':1,'B':2,'C':3})   # keys become an index

A    1
B    2
C    3
dtype: int64

In [10]:
# how to create a scalar series
# means all values set to constant
pd.Series('Pune')

0    Pune
dtype: object

In [11]:
pd.Series('Puna',range(0,5))

0    Puna
1    Puna
2    Puna
3    Puna
4    Puna
dtype: object

## Methods related to Series

In [12]:
s = pd.Series(np.random.randint(2,12,10))
s

0     6
1     6
2     4
3    11
4    10
5     5
6     6
7     9
8    10
9     3
dtype: int64

# **Drop()** : Temp

In [13]:
#Remove elements of a Series based on specifying the index labels.
# if we want to remove elements of row
#then specify index
s.drop(index=[7,1,5])

0     6
2     4
3    11
4    10
6     6
8    10
9     3
dtype: int64

In [14]:
s

0     6
1     6
2     4
3    11
4    10
5     5
6     6
7     9
8    10
9     3
dtype: int64

In [15]:
# above drop operation is temp.
# in order to make it permenant use inplace
s.drop(index=7,inplace=True)

In [16]:
s

0     6
1     6
2     4
3    11
4    10
5     5
6     6
8    10
9     3
dtype: int64

In [17]:
# we can also supply multiple indices
s.drop(index=[0,2,5,8])

1     6
3    11
4    10
6     6
9     3
dtype: int64

In [18]:
# drop elements from index 4 to 6
s.drop(index=range(4,7))

0     6
1     6
2     4
3    11
8    10
9     3
dtype: int64

In [19]:
s

0     6
1     6
2     4
3    11
4    10
5     5
6     6
8    10
9     3
dtype: int64

# **drop_duplicates()**

In [20]:
# Return Series with duplicate values removed.
s.drop_duplicates()  # it keeps first occurance

0     6
2     4
3    11
4    10
5     5
9     3
dtype: int64

In [21]:
# To keeps last occurance
s.drop_duplicates(keep='last')

2     4
3    11
5     5
6     6
8    10
9     3
dtype: int64

In [22]:
# if we want to remove all duplicates
s.drop_duplicates(keep=False)

2     4
3    11
5     5
9     3
dtype: int64

In [23]:
# Lets deal with NaN: Not a Number
# when we have missing entries/Empty/Null value
# then its NaN for pandas
import numpy as np
d = pd.Series([np.nan,100,230,np.nan])
d

0      NaN
1    100.0
2    230.0
3      NaN
dtype: float64

## **dropna()**
to remove nan

Return a new Series with missing values removed.


In [24]:
d.dropna()

1    100.0
2    230.0
dtype: float64

## **isna()**

to check missing data


In [25]:
d.isna()

0     True
1    False
2    False
3     True
dtype: bool

## **isna().sum()**

To calculate total NAN values


In [26]:
d.isna().sum()

2

## **Map()**

Map in Python is a function that works as an iterator to return a result after applying a function to every item of an iterable (tuple, lists, etc.).

In [28]:
nm = pd.Series(['Male','Female','Male','Female','Female'])
nm

0      Male
1    Female
2      Male
3    Female
4    Female
dtype: object

In [29]:
nm.map({'Male':0,'Female':1})

0    0
1    1
2    0
3    1
4    1
dtype: int64

In [33]:
nm

0      Male
1    Female
2      Male
3    Female
4    Female
dtype: object

## **replace()**
Replace values given in `to_replace` with `value`.

In [31]:
nm.replace?

In [32]:
nm.replace({'Male':0})

0         0
1    Female
2         0
3    Female
4    Female
dtype: object

In [35]:
nm.replace({'Male':0,'Female':1})

0    0
1    1
2    0
3    1
4    1
dtype: int64

In [36]:
nm.str.replace('Male','0').str.replace('Female','1')

0    0
1    1
2    0
3    1
4    1
dtype: object

# **nlargest()**

Return the largest `n` elements.

default 5 Return descending sorted values.

In [39]:
pd.Series.nlargest?

In [38]:
s = pd.Series([12,2,4,56,77,911,0,123,245,600,10,11])
s

0      12
1       2
2       4
3      56
4      77
5     911
6       0
7     123
8     245
9     600
10     10
11     11
dtype: int64

In [41]:
s.nlargest()
# sort  a Series in Descending order
# default it return 5 values[High to low]

5    911
9    600
8    245
7    123
4     77
dtype: int64

In [42]:
# check total elements present in s
s.size

12

In [43]:
# if we want more values then
s.nlargest(3)

5    911
9    600
8    245
dtype: int64

## **nsmallest()**

return values in ascending order

low to High

Return the smallest n elements.

In [44]:
s.nsmallest()

6      0
1      2
2      4
10    10
11    11
dtype: int64

In [45]:
s.nsmallest(3)

6    0
1    2
2    4
dtype: int64

## **unique()**
Return unique values of Series object.


In [47]:
pd.Series.unique?

In [48]:
x = pd.Series(['A','B','C','A','A','B'])
x

0    A
1    B
2    C
3    A
4    A
5    B
dtype: object

In [49]:
x.unique()

array(['A', 'B', 'C'], dtype=object)

In [50]:
set(x)

{'A', 'B', 'C'}

# **nunique()**

to get a count of unique reocrds use nunique()


In [51]:
x

0    A
1    B
2    C
3    A
4    A
5    B
dtype: object

In [52]:
x.nunique()

3

# **Value_counts()**

Categorywise count will b given by value_counts

In [53]:
x.value_counts()  #per category count

A    3
B    2
C    1
dtype: int64

# **Convert a Series into....**

In [54]:
s

0      12
1       2
2       4
3      56
4      77
5     911
6       0
7     123
8     245
9     600
10     10
11     11
dtype: int64

In [55]:
# convert a series to numpy array
s.to_numpy()

array([ 12,   2,   4,  56,  77, 911,   0, 123, 245, 600,  10,  11])

In [56]:
s.values

array([ 12,   2,   4,  56,  77, 911,   0, 123, 245, 600,  10,  11])

In [58]:
#using np
import numpy as np
np.array(s)

array([ 12,   2,   4,  56,  77, 911,   0, 123, 245, 600,  10,  11])

In [59]:
# convert a series to csv file
s.to_csv('file.csv')   #convert

In [60]:
s.to_csv('file1.csv',index=False,header=False)

In [61]:
# series to dict
print(s.to_dict())

{0: 12, 1: 2, 2: 4, 3: 56, 4: 77, 5: 911, 6: 0, 7: 123, 8: 245, 9: 600, 10: 10, 11: 11}


In [62]:
print(dict(s))

{0: 12, 1: 2, 2: 4, 3: 56, 4: 77, 5: 911, 6: 0, 7: 123, 8: 245, 9: 600, 10: 10, 11: 11}


In [63]:
#convert a Series to list
list(s)

[12, 2, 4, 56, 77, 911, 0, 123, 245, 600, 10, 11]

In [64]:
s.to_list()

[12, 2, 4, 56, 77, 911, 0, 123, 245, 600, 10, 11]

# **head()**

to display first 5 records default



In [65]:
s

0      12
1       2
2       4
3      56
4      77
5     911
6       0
7     123
8     245
9     600
10     10
11     11
dtype: int64

In [66]:
s.head()

0    12
1     2
2     4
3    56
4    77
dtype: int64

In [67]:
s.head(2)

0    12
1     2
dtype: int64

# **tail()**

returns last 5 records

In [68]:
s.tail()

7     123
8     245
9     600
10     10
11     11
dtype: int64

In [69]:
s.tail(2)

10    10
11    11
dtype: int64