In [2]:
import pandas as pd
import numpy as np

In [4]:
#  a Series may seem simple—much like a single column or row in a spreadsheet. 
#  A Pandas Series is a one-dimensional labeled array capable of holding any data type
# including integers, strings, floats, Python objects, and even mixed types
points = [30, 25, 15, 10, 20]
player_points_1 = pd.Series(points)
player_points_1

0    30
1    25
2    15
3    10
4    20
dtype: int64

In [None]:
# using list
player_points_2 = pd.Series(
    [30, 25, 15, 10, 20],  # Points scored by each player in a match
    index=["Player 1", "Player 2", "Player 3", "Player 4", "Player 5"]  # Player Names
)
player_points_2

Player 1    30
Player 2    25
Player 3    15
Player 4    10
Player 5    20
dtype: int64

In [7]:
# using dictionary
data_dictionary = {'Player 1': 30, 'Player 2': 25, 'Player 3': 15, 'Player 4': 10, 'Player 5': 20}
player_points_3 = pd.Series(data_dictionary)
player_points_3

Player 1    30
Player 2    25
Player 3    15
Player 4    10
Player 5    20
dtype: int64

In [None]:
# using numpy
data_numpy = np.array([30, 25, 15, 10, 20])
series_numpy = pd.Series(data_numpy)
series_numpy

0    30
1    25
2    15
3    10
4    20
dtype: int64

In [10]:
# multiple data types
multiple_data_types = [10, 'Hello', 3.14, True]
series_multiple_data_types = pd.Series(multiple_data_types)
series_multiple_data_types


0       10
1    Hello
2     3.14
3     True
dtype: object

In [11]:
empty_series = pd.Series()

In [12]:
points_4 = [32, 18, 27, 12, 24, 24, 24, 29, 10, 19,
          30, 22, 13, 28, 17, 25, 14, 27, 16, 26,
          11, 20, 31, 27, 27, 19, 12, 24, 30, 14]
player_points_4 = pd.Series(points_4)
player_points_4.head()

0    32
1    18
2    27
3    12
4    24
dtype: int64

In [13]:
player_points_4.tail()

25    19
26    12
27    24
28    30
29    14
dtype: int64

In [14]:
# accessing elements in a series using iloc
player_points_4.iloc[5]

np.int64(24)

In [15]:
# using loc
player_points_2['Player 1']

np.int64(30)

In [16]:
# add new data in series 
player_points_4[30] = 22

In [17]:
player_points_4.tail()

26    12
27    24
28    30
29    14
30    22
dtype: int64

In [None]:
# number of value counts 
# Value counts allow you to see how often a value appears within your series.
player_points_4.value_counts().head(5)

27    4
24    4
19    2
12    2
14    2
Name: count, dtype: int64

In [19]:
# Count will tell us the amount of non null values in our series.
player_points_4.count()

np.int64(31)

In [20]:
# If you want to see the number of values including null ones use size.
player_points_4.size

31

In [21]:
# nunique shows us the number of unique values
player_points_4.nunique()

20

In [22]:
# If you want to see if every value is different use .is_unique
player_points_4.is_unique

False

In [23]:
player_points_4.dtype

dtype('int64')

In [24]:
# data type conversion
# You can also convert the datatypes within a series. Down below we change an int series to float
player_points_4 = player_points_4.astype("float64")
player_points_4.dtype

dtype('float64')

In [26]:
# To update values across a series, we have to generate a new series. 
player_points_updated = player_points_4 + 5
player_points_updated

0     37.0
1     23.0
2     32.0
3     17.0
4     29.0
5     29.0
6     29.0
7     34.0
8     15.0
9     24.0
10    35.0
11    27.0
12    18.0
13    33.0
14    22.0
15    30.0
16    19.0
17    32.0
18    21.0
19    31.0
20    16.0
21    25.0
22    36.0
23    32.0
24    32.0
25    24.0
26    17.0
27    29.0
28    35.0
29    19.0
30    27.0
dtype: float64

In [27]:
# sum two series dataframes
runs1 = pd.Series([3, 5, 4])
runs2 = pd.Series([11, 3, 4])
runs_3 = runs1 + runs2
runs_3

0    14
1     8
2     8
dtype: int64

In [28]:
# Descriptive statistics
print("Mean:", player_points_4.mean())
print("Median:", player_points_4.median())
print("Standard Deviation:", player_points_4.std())
print("Sum:", player_points_4.sum())
print("Min:", player_points_4.min())
print("Max:", player_points_4.max())

Mean: 21.741935483870968
Median: 24.0
Standard Deviation: 6.597816517280465
Sum: 674.0
Min: 10.0
Max: 32.0


In [29]:
# Sort by index
player_points_updated.sort_index(inplace=True)

In [30]:
player_points_updated.sort_index(ascending=False, inplace=True)

In [31]:
# Sort by Values
player_points_updated.sort_values(inplace=True)

In [32]:
player_points_updated.sort_values(ascending=False, inplace=True)

In [33]:
# Element Wise Comparrison
player_points_updated > 30

0      True
22     True
10     True
28     True
7      True
13     True
24     True
23     True
17     True
2      True
19     True
15    False
6     False
4     False
27    False
5     False
11    False
30    False
21    False
25    False
9     False
1     False
14    False
18    False
29    False
16    False
12    False
3     False
26    False
20    False
8     False
dtype: bool

In [34]:
player_points_updated.loc[player_points_updated > 30]

0     37.0
22    36.0
10    35.0
28    35.0
7     34.0
13    33.0
24    32.0
23    32.0
17    32.0
2     32.0
19    31.0
dtype: float64

In [35]:
# Check for Null Values
null_series = pd.Series([25, 10, 5, np.nan, 8, 41, np.nan])
null_series.isna()

0    False
1    False
2    False
3     True
4    False
5    False
6     True
dtype: bool

In [36]:
#  Remove Null Values
null_series_removed = null_series.dropna()

In [37]:
# Fill Null Values
null_series_filled = null_series.fillna(0)

In [38]:
null_series_filled_2 = null_series.fillna(null_series.median())

In [40]:
# Apply allows you to create a new series based off of a function
def points_times_two(x):
    return x * 2

player_points_updated_applied = player_points_updated.apply(points_times_two)

player_points_updated.head()

0     37.0
22    36.0
10    35.0
28    35.0
7     34.0
dtype: float64

In [42]:
# Turn dataframe column into a series
score_dictionary = {'Player A': [10, 12, 23], 'Player B': [14, 16, 16], 'Player C': [17, 22, 29]}
df = pd.DataFrame(score_dictionary)
df

Unnamed: 0,Player A,Player B,Player C
0,10,14,17
1,12,16,22
2,23,16,29


In [45]:
series_from_df_1 = df['Player A'].squeeze()
series_from_df_1

0    10
1    12
2    23
Name: Player A, dtype: int64

In [46]:
series_from_df_2 = df['Player B'].squeeze()
series_from_df_2

0    14
1    16
2    16
Name: Player B, dtype: int64

In [49]:
# Create a DataFrame from two Series
df_2 = pd.concat([series_from_df_1, series_from_df_2], axis=1)
df_2

Unnamed: 0,Player A,Player B
0,10,14
1,12,16
2,23,16


In [50]:
series_from_df_1.to_list()

[10, 12, 23]