In [1]:
import pandas as pd

In [2]:
wnba = pd.read_csv('WNBA Stats.csv')

## 1. Frequency Distribution Tables

In [5]:
freq_distro_pos = wnba['Pos'].value_counts()
freq_distro_pos

G      60
F      33
C      25
G/F    13
F/C    12
Name: Pos, dtype: int64

In [6]:
freq_distro_height = wnba['Height'].value_counts()
freq_distro_height

188    20
193    18
175    16
185    15
173    11
183    11
191    11
196     9
178     8
180     7
170     6
198     5
168     2
201     2
165     1
206     1
Name: Height, dtype: int64

## 2. Sorting Frequency Distribution Tables

In [8]:
wnba['Age'].value_counts().sort_index() #ascending

21     2
22    10
23    15
24    16
25    15
26    12
27    13
28    14
29     8
30     9
31     8
32     8
33     3
34     5
35     4
36     1
Name: Age, dtype: int64

In [9]:
wnba['Age'].value_counts().sort_index(ascending = False) # descending

36     1
35     4
34     5
33     3
32     8
31     8
30     9
29     8
28    14
27    13
26    12
25    15
24    16
23    15
22    10
21     2
Name: Age, dtype: int64

## 3. Sorting Tables for Ordinal Variables

In [13]:
def make_pts_ordinal(row):
    if row['PTS'] <= 20:
        return 'very few points'
    if (20 < row['PTS'] <=  80):
        return 'few points'
    if (80 < row['PTS'] <=  150):
        return 'many, but below average'
    if (150 < row['PTS'] <= 300):
        return 'average number of points'
    if (300 < row['PTS'] <=  450):
        return 'more than average'
    else:
        return 'much more than average'

# converting PTS column to ordinal scale from ratio scale  
wnba['PTS_ordinal_scale'] = wnba.apply(make_pts_ordinal, axis = 1)

pts_ordinal_desc = wnba['PTS_ordinal_scale'].value_counts().iloc[[4, 3, 0, 2, 1, 5]] #descending
pts_ordinal_desc

much more than average      13
more than average           21
average number of points    45
many, but below average     25
few points                  27
very few points             12
Name: PTS_ordinal_scale, dtype: int64

## 4. Proportions and Percentages

In [14]:
print(wnba['Pos'].value_counts())

G      60
F      33
C      25
G/F    13
F/C    12
Name: Pos, dtype: int64


In [15]:
print(wnba['Pos'].value_counts() / len(wnba))

G      0.419580
F      0.230769
C      0.174825
G/F    0.090909
F/C    0.083916
Name: Pos, dtype: float64


In [16]:
print(wnba['Pos'].value_counts(normalize = True))

G      0.419580
F      0.230769
C      0.174825
G/F    0.090909
F/C    0.083916
Name: Pos, dtype: float64


In [17]:
print(wnba['Pos'].value_counts(normalize = True) * 100)

G      41.958042
F      23.076923
C      17.482517
G/F     9.090909
F/C     8.391608
Name: Pos, dtype: float64


In [19]:
percentages = wnba['Age'].value_counts(normalize = True).sort_index() * 100
percentages

21     1.398601
22     6.993007
23    10.489510
24    11.188811
25    10.489510
26     8.391608
27     9.090909
28     9.790210
29     5.594406
30     6.293706
31     5.594406
32     5.594406
33     2.097902
34     3.496503
35     2.797203
36     0.699301
Name: Age, dtype: float64

In [20]:
proportion_25 = percentages[25] / 100
proportion_25

0.1048951048951049

In [22]:
percentage_30 = percentages[30]
percentage_30

6.293706293706294

In [23]:
percentage_over_30 = percentages.loc[30:].sum()
percentage_over_30

26.573426573426573

In [24]:
percentage_below_23 = percentages.loc[:23].sum()
percentage_below_23

18.88111888111888

## 5. Finding Percentiles with pandas

In [25]:
print(wnba['Age'].describe())

count    143.000000
mean      27.076923
std        3.679170
min       21.000000
25%       24.000000
50%       27.000000
75%       30.000000
max       36.000000
Name: Age, dtype: float64


In [26]:
print(wnba['Age'].describe().iloc[3:])

min    21.0
25%    24.0
50%    27.0
75%    30.0
max    36.0
Name: Age, dtype: float64


In [27]:
print(wnba['Age'].describe(percentiles = [.1, .15, .33, .5, .592, .85, .9]).iloc[3:])

min      21.0
10%      23.0
15%      23.0
33%      25.0
50%      27.0
59.2%    28.0
85%      31.0
90%      32.0
max      36.0
Name: Age, dtype: float64


## 6. Grouped Frequency Distribution Tables

In [30]:
print(wnba['Weight'].value_counts(bins = 10).sort_index())

(54.941, 60.8]     5
(60.8, 66.6]      21
(66.6, 72.4]      10
(72.4, 78.2]      33
(78.2, 84.0]      31
(84.0, 89.8]      24
(89.8, 95.6]      10
(95.6, 101.4]      3
(101.4, 107.2]     2
(107.2, 113.0]     3
Name: Weight, dtype: int64


In [28]:
grouped_freq_table = wnba['PTS'].value_counts(bins = 10,
                normalize = True).sort_index(ascending = False) * 100

grouped_freq_table

(525.8, 584.0]     3.496503
(467.6, 525.8]     2.797203
(409.4, 467.6]     5.594406
(351.2, 409.4]     6.993007
(293.0, 351.2]     5.594406
(234.8, 293.0]    11.888112
(176.6, 234.8]    13.986014
(118.4, 176.6]    11.888112
(60.2, 118.4]     16.783217
(1.417, 60.2]     20.979021
Name: PTS, dtype: float64

## 7. Readability for Grouped Frequency Tables

In [34]:
wnba['PTS'].value_counts(bins=10)

(1.417, 60.2]     30
(60.2, 118.4]     24
(176.6, 234.8]    20
(118.4, 176.6]    17
(234.8, 293.0]    17
(351.2, 409.4]    10
(293.0, 351.2]     8
(409.4, 467.6]     8
(525.8, 584.0]     5
(467.6, 525.8]     4
Name: PTS, dtype: int64

In [32]:
intervals = pd.interval_range(start = 0, end = 600, freq = 60)

gr_freq_table_10 = wnba['PTS'].value_counts(bins = intervals).sort_index()
gr_freq_table_10

(0, 60]       30
(60, 120]     25
(120, 180]    17
(180, 240]    22
(240, 300]    15
(300, 360]     7
(360, 420]    11
(420, 480]     7
(480, 540]     4
(540, 600]     5
Name: PTS, dtype: int64

## 8. Frequency Tables and Continuous Variables

In [38]:
# The height of 175 cm has a frequency of 16 in the distribution of the Height variable
print(wnba['Height'].value_counts()[175])
# there are 16 players with a height that's somewhere between 174.5 cm and 175.5 cm

16


In [36]:
print(wnba['Height'].describe().iloc[3:])

min    165.0
25%    176.5
50%    185.0
75%    191.0
max    206.0
Name: Height, dtype: float64
