# Import software libraries and load the datasets

In [1]:
import sys
import numpy as np
import pandas as pd

# Summarize software libraries used.
print('Libraries used in this project:')
print('- Python {}'.format(sys.version))
print('- NumPy {}'.format(np.__version__))
print('- pandas {}'.format(pd.__version__))

# Load the datasets.
sales_arr = np.loadtxt('..\\data\\initial_sales.csv',
                       delimiter = ',')
ratings_arr = np.loadtxt('..\\data\\initial_ratings.csv',
                         delimiter = ',')
print('\nLoaded datasets.')

Libraries used in this project:
- Python 3.9.7 (default, Sep 16 2021, 16:59:28) [MSC v.1916 64 bit (AMD64)]
- NumPy 1.22.3
- pandas 1.4.3

Loaded datasets.


# Create a `Series` from a 1-D NumPy array

In [2]:
print(ratings_arr)
print('\nShape of ratings_arr: {}'.format(ratings_arr.shape))

[ 9.1  9.6  7.4  8.4  5.3  4.1  5.8  8.   7.2  5.9  4.5  6.8  7.1  8.2
  5.7  4.5  4.6  6.9  8.6  4.4  4.8  5.1  4.4  9.9  6.   8.5  6.7  7.7
  9.6  7.4  4.8  4.5  5.1  5.1  7.5  6.8  7.   4.7  7.6  7.7  7.9  6.3
  5.6  7.6  9.5  8.4  4.1  8.1  7.9  9.5  8.5  6.5  6.1  6.5  8.2  5.8
  6.6  9.3 10.   7.  10.   8.6  7.6  5.8  6.7  9.9  6.4  4.3  9.6  5.9
  4.   8.7  9.4  5.4  5.7  6.6  6.   5.5  6.4  6.6  8.3  6.6  4.   9.9
  7.3  5.7  7.1  8.2  5.1  8.6  6.6  7.2  5.1  4.1  9.3  7.4]

Shape of ratings_arr: (96,)


In [3]:
ratings_s = pd.Series(ratings_arr)
ratings_s

0     9.1
1     9.6
2     7.4
3     8.4
4     5.3
     ... 
91    7.2
92    5.1
93    4.1
94    9.3
95    7.4
Length: 96, dtype: float64

In [4]:
print('Series shape:   {}'.format(ratings_s.shape))
print('Series indices: {}'.format(ratings_s.index))

Series shape:   (96,)
Series indices: RangeIndex(start=0, stop=96, step=1)


# Create a `DataFrame` from a 2-D NumPy array

In [5]:
print(sales_arr[:10, :])
print('\nShape of sales_arr: {}'.format(sales_arr.shape))

[[ 74.69   7.    26.14 548.97 522.83 500.24]
 [ 15.28   5.     3.82  80.22  76.4   73.21]
 [ 46.33   7.    16.22 340.53 324.31 321.12]
 [ 58.22   8.    23.29 489.05 465.76 430.98]
 [ 86.31   7.    30.21 634.38 604.17 578.9 ]
 [ 85.39   7.    29.89 627.62 597.73 585.04]
 [ 68.84   6.    20.65 433.69 413.04 395.42]
 [ 73.56  10.    36.78 772.38 735.6  702.08]
 [ 36.26   2.     3.63  76.15  72.52  69.8 ]
 [ 54.84   3.     8.23 172.75 164.52 156.08]]

Shape of sales_arr: (96, 6)


In [6]:
cols = ['UnitPrice', 'Quantity', 'Tax', 'TotalPrice', 'Revenue', 'COGS']
sales_df = pd.DataFrame(sales_arr, columns = cols)
sales_df

Unnamed: 0,UnitPrice,Quantity,Tax,TotalPrice,Revenue,COGS
0,74.69,7.0,26.14,548.97,522.83,500.24
1,15.28,5.0,3.82,80.22,76.40,73.21
2,46.33,7.0,16.22,340.53,324.31,321.12
3,58.22,8.0,23.29,489.05,465.76,430.98
4,86.31,7.0,30.21,634.38,604.17,578.90
...,...,...,...,...,...,...
91,97.16,1.0,4.86,102.02,97.16,92.68
92,87.87,10.0,43.94,922.64,878.70,848.40
93,12.45,6.0,3.74,78.44,74.70,72.06
94,52.75,3.0,7.91,166.16,158.25,152.99


In [7]:
print('DataFrame shape:   {}'.format(sales_df.shape))
print('DataFrame size:    {}'.format(sales_df.size))
print('DataFrame indices: {}'.format(sales_df.index))

DataFrame shape:   (96, 6)
DataFrame size:    576
DataFrame indices: RangeIndex(start=0, stop=96, step=1)


In [8]:
sales_df.dtypes

UnitPrice     float64
Quantity      float64
Tax           float64
TotalPrice    float64
Revenue       float64
COGS          float64
dtype: object

# Get a quick look at `DataFrame` records

In [9]:
sales_df.head(10)

Unnamed: 0,UnitPrice,Quantity,Tax,TotalPrice,Revenue,COGS
0,74.69,7.0,26.14,548.97,522.83,500.24
1,15.28,5.0,3.82,80.22,76.4,73.21
2,46.33,7.0,16.22,340.53,324.31,321.12
3,58.22,8.0,23.29,489.05,465.76,430.98
4,86.31,7.0,30.21,634.38,604.17,578.9
5,85.39,7.0,29.89,627.62,597.73,585.04
6,68.84,6.0,20.65,433.69,413.04,395.42
7,73.56,10.0,36.78,772.38,735.6,702.08
8,36.26,2.0,3.63,76.15,72.52,69.8
9,54.84,3.0,8.23,172.75,164.52,156.08


In [10]:
sales_df.tail(10)

Unnamed: 0,UnitPrice,Quantity,Tax,TotalPrice,Revenue,COGS
86,47.38,4.0,9.48,199.0,189.52,182.38
87,44.86,10.0,22.43,471.03,448.6,427.73
88,21.98,7.0,7.69,161.55,153.86,149.21
89,64.36,9.0,28.96,608.2,579.24,561.34
90,89.75,1.0,4.49,94.24,89.75,84.76
91,97.16,1.0,4.86,102.02,97.16,92.68
92,87.87,10.0,43.94,922.64,878.7,848.4
93,12.45,6.0,3.74,78.44,74.7,72.06
94,52.75,3.0,7.91,166.16,158.25,152.99
95,82.7,6.0,24.81,521.01,496.2,477.84
