In [2]:
import numpy as np
import pandas as pd
from numpy.random import default_rng

In [3]:
rng = default_rng(12345)


In [4]:
sales = rng.integers(0,1000,10)

In [5]:
sales

array([699, 227, 788, 316, 204, 797, 642, 676, 988, 391], dtype=int64)

In [6]:
sales_series = pd.Series(sales,name='Sales')
sales_series

0    699
1    227
2    788
3    316
4    204
5    797
6    642
7    676
8    988
9    391
Name: Sales, dtype: int64

In [7]:
sales_series.values

array([699, 227, 788, 316, 204, 797, 642, 676, 988, 391], dtype=int64)

In [8]:
array = np.arange(5)
pd.Series(array)

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [9]:
sales_series.mean()

572.8

In [10]:
sales_series

0    699
1    227
2    788
3    316
4    204
5    797
6    642
7    676
8    988
9    391
Name: Sales, dtype: int64

In [11]:
sales_series.index

RangeIndex(start=0, stop=10, step=1)

In [12]:
sales_series.name = 'New Sales'

In [13]:
sales_series

0    699
1    227
2    788
3    316
4    204
5    797
6    642
7    676
8    988
9    391
Name: New Sales, dtype: int64

In [14]:
sales_series.astype('float')

0    699.0
1    227.0
2    788.0
3    316.0
4    204.0
5    797.0
6    642.0
7    676.0
8    988.0
9    391.0
Name: New Sales, dtype: float64

In [15]:
sales_series[1] = 0

In [16]:
sales_series.astype('bool')


0     True
1    False
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
Name: New Sales, dtype: bool

In [17]:
sales = [0,5,155,0,518]
sales_series = pd.Series(sales,name='Sales')
sales_series

0      0
1      5
2    155
3      0
4    518
Name: Sales, dtype: int64

In [18]:
items = ['coffe','bananas','tea','coconut','sugar']

In [19]:
sales_series = pd.Series(sales,index=items, name='Sales')

In [20]:
sales_series.index = items

In [21]:
sales_series

coffe        0
bananas      5
tea        155
coconut      0
sugar      518
Name: Sales, dtype: int64

In [22]:
sales_series['coffe':'tea']

coffe        0
bananas      5
tea        155
Name: Sales, dtype: int64

In [23]:
my_series = pd.Series(range(5))

In [24]:
my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [25]:
my_series[3]

3

In [26]:
my_series[0:2]

0    0
1    1
dtype: int64

In [27]:
sales_series.index = ['Day 0','Day 1','Day 2','Day 3','Day 4']

In [28]:
sales_series.iloc[3]

0

In [29]:
sales_series.iloc[0:3]

Day 0      0
Day 1      5
Day 2    155
Name: Sales, dtype: int64

In [30]:
sales_series.iloc[[1,3]]

Day 1    5
Day 3    0
Name: Sales, dtype: int64

In [31]:
sales_series.loc['Day 2']

155

In [32]:
sales_series.loc['Day 1':]

Day 1      5
Day 2    155
Day 3      0
Day 4    518
Name: Sales, dtype: int64

In [33]:
sales_series.iloc[[1,4]]

Day 1      5
Day 4    518
Name: Sales, dtype: int64

In [34]:
sales_series.reset_index(drop=True) #drop current index

0      0
1      5
2    155
3      0
4    518
Name: Sales, dtype: int64

In [35]:
sales_series.reset_index()          #add new column with new index

Unnamed: 0,index,Sales
0,Day 0,0
1,Day 1,5
2,Day 2,155
3,Day 3,0
4,Day 4,518


In [36]:
sales_series

Day 0      0
Day 1      5
Day 2    155
Day 3      0
Day 4    518
Name: Sales, dtype: int64

In [37]:
sales_series.reset_index(drop=True)

0      0
1      5
2    155
3      0
4    518
Name: Sales, dtype: int64

In [38]:
ages = pd.Series(
    [25, 30, 35, 40, 45, 25, 30], 
    index=['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Alice', 'Bob']
)

In [39]:
ages.reset_index(drop=True)

0    25
1    30
2    35
3    40
4    45
5    25
6    30
dtype: int64

In [40]:
sales_series

Day 0      0
Day 1      5
Day 2    155
Day 3      0
Day 4    518
Name: Sales, dtype: int64

In [41]:
index_array = np.array(['coffe','coffe','tea','coconut','sugar'])

In [42]:
sales_series.index = index_array

In [43]:
sales_series

coffe        0
coffe        5
tea        155
coconut      0
sugar      518
Name: Sales, dtype: int64

In [44]:
sales_series.loc[sales_series > 0]

coffe      5
tea      155
sugar    518
Name: Sales, dtype: int64

In [45]:
mask = (sales_series > 0) & (sales_series.index == 'coffe')

sales_series.loc[mask]

coffe    5
Name: Sales, dtype: int64

In [46]:
sales_series == 5

coffe      False
coffe       True
tea        False
coconut    False
sugar      False
Name: Sales, dtype: bool

In [47]:
sales_series.eq(5)

coffe      False
coffe       True
tea        False
coconut    False
sugar      False
Name: Sales, dtype: bool

In [48]:
my_series.index = ['Day 0','Day 1','Day 2','Day 3','Day 4']

In [49]:
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [50]:
my_series == 2

Day 0    False
Day 1    False
Day 2     True
Day 3    False
Day 4    False
dtype: bool

In [51]:
my_series.loc[my_series == 2]

Day 2    2
dtype: int64

In [52]:
my_series.loc[~(my_series == 2)]


Day 0    0
Day 1    1
Day 3    3
Day 4    4
dtype: int64

In [53]:
my_series.loc[my_series.isin([1,2])]

Day 1    1
Day 2    2
dtype: int64

In [54]:
my_series.loc[~my_series.isin([1,2])]


Day 0    0
Day 3    3
Day 4    4
dtype: int64

In [55]:
sales_series.sort_values(ascending=False) # dont replace initial order

sugar      518
tea        155
coffe        5
coffe        0
coconut      0
Name: Sales, dtype: int64

In [56]:
sales_series.sort_values(ascending=False, inplace=True) # inplace replace the order

In [57]:
sales_series

sugar      518
tea        155
coffe        5
coffe        0
coconut      0
Name: Sales, dtype: int64

In [58]:
sales_series.sort_index()


coconut      0
coffe        5
coffe        0
sugar      518
tea        155
Name: Sales, dtype: int64

In [59]:
ages = pd.Series(
    [42, 37, 29, 52, 26, 42, 37], 
    index=['Beth', 'Fred', 'Max', 'Janet', 'Liz', 'Juan', 'Xiu']
)

In [60]:
ages.sort_values()

Liz      26
Max      29
Fred     37
Xiu      37
Beth     42
Juan     42
Janet    52
dtype: int64

In [61]:
sales_series.sort_index(inplace=True)

In [62]:
sales_series

coconut      0
coffe        5
coffe        0
sugar      518
tea        155
Name: Sales, dtype: int64

In [63]:
sales_series + 2
sales_series.add(2) 

coconut      2
coffe        7
coffe        2
sugar      520
tea        157
Name: Sales, dtype: int64

In [64]:
"$" + sales_series.astype('float').astype('string')

coconut      $0.0
coffe        $5.0
coffe        $0.0
sugar      $518.0
tea        $155.0
Name: Sales, dtype: string

In [65]:
sales_series[1] = np.NaN

In [66]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [67]:
sales_series.add(1,fill_value=0)

coconut      1.0
coffe        1.0
coffe        1.0
sugar      519.0
tea        156.0
Name: Sales, dtype: float64

In [68]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [69]:
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [70]:
my_series + sales_series # NaN cecause of type diff

Day 0     NaN
Day 1     NaN
Day 2     NaN
Day 3     NaN
Day 4     NaN
coconut   NaN
coffe     NaN
coffe     NaN
sugar     NaN
tea       NaN
dtype: float64

In [71]:
my_series2 = my_series + 5 

In [72]:
my_series + my_series2

Day 0     5
Day 1     7
Day 2     9
Day 3    11
Day 4    13
dtype: int64

In [73]:
string_series = pd.Series(['Day 0','Day 1','Day 2','Day 3','Day 4',])

In [74]:
string_series

0    Day 0
1    Day 1
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [75]:
string_series.str.upper().str.contains('DAY 1')

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [76]:
string_series.str.strip('Day ')

0    0
1    1
2    2
3    3
4    4
dtype: object

In [77]:
string_series.str[0]


0    D
1    D
2    D
3    D
4    D
dtype: object

In [78]:
string_series.str[-1].astype('int')


0    0
1    1
2    2
3    3
4    4
dtype: int32

In [79]:
string_series.str.split(' ')

0    [Day, 0]
1    [Day, 1]
2    [Day, 2]
3    [Day, 3]
4    [Day, 4]
dtype: object

In [80]:
string_series.str.split(' ',expand=True)


Unnamed: 0,0,1
0,Day,0
1,Day,1
2,Day,2
3,Day,3
4,Day,4


In [81]:
age_data = pd.Series(['Adult 25', 'Child 12', 'Adult 64', 'Teen 17', 'Adult 45'])

In [82]:
age_data

0    Adult 25
1    Child 12
2    Adult 64
3     Teen 17
4    Adult 45
dtype: object

In [83]:
split_data = age_data.str.split(' ', expand=True)

In [84]:
split_data[1] = split_data[1].astype('int')

In [85]:
split_data

Unnamed: 0,0,1
0,Adult,25
1,Child,12
2,Adult,64
3,Teen,17
4,Adult,45


In [86]:
split_data.loc[:,[1]]

Unnamed: 0,1
0,25
1,12
2,64
3,17
4,45


In [87]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [88]:
sales_series.sum()

673.0

In [89]:
sales_series.quantile([0.25,0.50,0.75])

0.25      0.00
0.50     77.50
0.75    245.75
Name: Sales, dtype: float64

In [90]:
 transactions = pd.read_csv("../data-analysis-pandas/retail/transactions.csv")

In [91]:
transactions_series = pd.Series(transactions['transactions'])

In [92]:
transactions_series.iloc[:5]

0     770
1    2111
2    2358
3    3487
4    1922
Name: transactions, dtype: int64

In [93]:
transactions_series.sum()

141478945

In [94]:
string_series[1] = 'Day 0'

In [95]:
string_series

0    Day 0
1    Day 0
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [96]:
string_series.value_counts()

Day 0    2
Day 2    1
Day 3    1
Day 4    1
dtype: int64

In [97]:
string_series.value_counts(normalize=True) #percentage value


Day 0    0.4
Day 2    0.2
Day 3    0.2
Day 4    0.2
dtype: float64

In [98]:
string_series.unique()

array(['Day 0', 'Day 2', 'Day 3', 'Day 4'], dtype=object)

In [99]:
string_series.nunique()

4

In [100]:
age_groups = pd.Series([
    'Adult', 'Child', 'Adult', 'Teen', 'Adult', 
    'Child', 'Teen', 'Adult', 'Adult', 'Adult'
])

In [101]:
age_groups

0    Adult
1    Child
2    Adult
3     Teen
4    Adult
5    Child
6     Teen
7    Adult
8    Adult
9    Adult
dtype: object

In [102]:
age_groups.value_counts(normalize=True)

Adult    0.6
Child    0.2
Teen     0.2
dtype: float64

In [103]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [104]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [105]:
mask = (sales_series.isna() == False) & (sales_series.index == 'coffe')
sales_series.loc[mask]

coffe    0.0
Name: Sales, dtype: float64

In [106]:
sales_series.isna().sum() # identify missing values

1

In [107]:
sales_series.value_counts()

0.0      2
518.0    1
155.0    1
Name: Sales, dtype: int64

In [108]:
sales_series.value_counts(dropna=False)


0.0      2
NaN      1
518.0    1
155.0    1
Name: Sales, dtype: int64

In [109]:
sales_series.dropna()

coconut      0.0
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [110]:
sales_series.fillna(1)

coconut      0.0
coffe        1.0
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [111]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [112]:
sales_series.fillna(sales_series.mean())

coconut      0.00
coffe      168.25
coffe        0.00
sugar      518.00
tea        155.00
Name: Sales, dtype: float64

In [113]:
nan_series = pd.Series([np.nan] * 5)

In [114]:
nan_series

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [115]:
na_series = pd.Series([pd.NA] * 5)

In [116]:
na_series

0    <NA>
1    <NA>
2    <NA>
3    <NA>
4    <NA>
dtype: object

In [117]:
age_data = pd.Series([25, np.nan, 30, 45, 110, 37, np.nan, 42, np.nan, 52])

In [118]:
age_data

0     25.0
1      NaN
2     30.0
3     45.0
4    110.0
5     37.0
6      NaN
7     42.0
8      NaN
9     52.0
dtype: float64

In [119]:
missing_count = age_data.isna().sum()
median_age = age_data.fillna(age_data.median())

In [120]:
median_age

0     25.0
1     42.0
2     30.0
3     45.0
4    110.0
5     37.0
6     42.0
7     42.0
8     42.0
9     52.0
dtype: float64

In [121]:
def discount(price):
    if price > 20:
        return round(price * 0.9,2)
    return price

In [122]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [123]:
sales_series.apply(discount)

coconut      0.0
coffe        NaN
coffe        0.0
sugar      466.2
tea        139.5
Name: Sales, dtype: float64

In [124]:
sales_series.apply(lambda x: round(x * 0.9,2) if x > 20 else x)

coconut      0.0
coffe        NaN
coffe        0.0
sugar      466.2
tea        139.5
Name: Sales, dtype: float64

In [125]:
string_series.apply(lambda x: x[:-2])

0    Day
1    Day
2    Day
3    Day
4    Day
dtype: object

In [126]:
def search(stringi, looking_for):
    if looking_for in stringi:
        return 'Found!'
    return 'Nope!'

In [127]:
string_series.apply(search, args='2')

0     Nope!
1     Nope!
2    Found!
3     Nope!
4     Nope!
dtype: object

In [132]:
sales_series

coconut      0.0
coffe        NaN
coffe        0.0
sugar      518.0
tea        155.0
Name: Sales, dtype: float64

In [136]:
sales_series.where(sales_series == 518, 'false') #pandas: where(logical, when false)

coconut    false
coffe      false
coffe      false
sugar      518.0
tea        false
Name: Sales, dtype: object

In [138]:
sales_series.where(~(sales_series == 518), 'false') #invert

coconut      0.0
coffe        NaN
coffe        0.0
sugar      false
tea        155.0
Name: Sales, dtype: object

In [140]:
(
sales_series
.where(~(sales_series == 518), 'false')
.where(~(sales_series == 155), 'false')
)

coconut      0.0
coffe        NaN
coffe        0.0
sugar      false
tea        false
Name: Sales, dtype: object

In [141]:
def search(stringi, looking_for):
    if looking_for in stringi:
        return 'Found!'
    return 'Nope!'

In [142]:
string_series.apply(search, args='2')

0     Nope!
1     Nope!
2    Found!
3     Nope!
4     Nope!
dtype: object

In [144]:
string_series.where(string_series.str.contains('2'), 'Nope!')

0    Nope!
1    Nope!
2    Day 2
3    Nope!
4    Nope!
dtype: object

In [153]:
string_series.where(string_series.str.contains('2'), 'Nope!').where(~string_series.str.contains('2'), 'Found!')


0     Nope!
1     Nope!
2    Found!
3     Nope!
4     Nope!
dtype: object

In [155]:
pd.Series(np.where(string_series.str.contains('2'),'Found!','Nope!'))

0     Nope!
1     Nope!
2    Found!
3     Nope!
4     Nope!
dtype: object

In [156]:
ages = pd.Series([16, 21, 15, 25, 14, 19, 22, 17, 13, 20])

In [158]:
ages.where(ages >= 18,'Teen').where(~(ages >= 18),'Adult')

0     Teen
1    Adult
2     Teen
3    Adult
4     Teen
5    Adult
6    Adult
7     Teen
8     Teen
9    Adult
dtype: object