In [1]:
# In order to make myself familiar with Pandas I will create and manipulate different data structures 
# with pandas; 
# For more information see: 
# 1. Intro to Data Structures <https://pandas.pydata.org/pandas-docs/stable/dsintro.html> 
# 2. Pandas Tutorial: DataFrames in Python <https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python> 
# 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# ---------------------------------- Part 1 ------------------------------------
# Generate a pandas DataFrame with 5 rows and 5 columns filled with random integers 
# A pandas DataFrame is a 2-dimensional labeled data structure with columns of potentially different types
pandas_dataframe = pd.DataFrame(np.random.randint(low=0, high=9, size=(5, 5)))

In [4]:
# Display pandas DataFrame
pandas_dataframe

Unnamed: 0,0,1,2,3,4
0,5,1,5,3,8
1,0,3,8,5,2
2,5,5,3,5,6
3,7,1,7,0,6
4,0,2,2,7,6


In [5]:
# Reshape the pandas DataFrame to a pandas Series of 25 values in 1 line 
# A pandas Series is a one-dimensional labeled array capable of holding any data type
pandas_series_from_dataframe = pandas_dataframe.values.reshape(1,25)

In [6]:
# Display pandas Series 
pandas_series_from_dataframe

array([[5, 1, 5, 3, 8, 0, 3, 8, 5, 2, 5, 5, 3, 5, 6, 7, 1, 7, 0, 6, 0, 2,
        2, 7, 6]])

In [7]:
# Display dimensions (rows, columns) of pandas Series 
pandas_series_from_dataframe.shape

(1, 25)

In [8]:
# Caveat: while pandas Series is a _one-dimensional_ array, it is _not_ a list! 
# Flatten the pandas Series to convert its values to a list 
pandas_list_from_dataframe = pandas_series_from_dataframe.flatten()

In [9]:
# Display dimensions (rows, columns) of the list
pandas_list_from_dataframe.shape

(25,)

In [10]:
# Display pandas list 
pandas_list_from_dataframe

array([5, 1, 5, 3, 8, 0, 3, 8, 5, 2, 5, 5, 3, 5, 6, 7, 1, 7, 0, 6, 0, 2,
       2, 7, 6])

In [11]:
# Convert to a pandas Series using values from the list
pandas_series_from_list = pd.Series(pandas_list_from_dataframe)

In [12]:
# Compute descriptive statistics for the pandas Series 
pandas_series_from_list.describe()

count    25.000000
mean      4.080000
std       2.564501
min       0.000000
25%       2.000000
50%       5.000000
75%       6.000000
max       8.000000
dtype: float64

In [13]:
# ---------------------------------- Part 2 ------------------------------------
# Generate pandas Series with 100 random integers 
pandas_series = pd.Series(np.random.randint(size=100, low=0, high=9))

In [14]:
# Display first 10 values from pandas Series 
pandas_series.head(10)

0    2
1    1
2    5
3    3
4    6
5    2
6    3
7    4
8    4
9    2
dtype: int64

In [15]:
# Compute descriptive statistics for the pandas Series 
pandas_series.describe()

count    100.000000
mean       4.160000
std        2.481121
min        0.000000
25%        2.000000
50%        4.000000
75%        6.000000
max        8.000000
dtype: float64

In [16]:
# Convert pandas Series to an array 
pandas_array_from_series = pandas_series.values.reshape(10,10)

In [17]:
# Display array 
pandas_array_from_series

array([[2, 1, 5, 3, 6, 2, 3, 4, 4, 2],
       [6, 0, 4, 3, 2, 7, 1, 6, 7, 7],
       [8, 6, 7, 8, 2, 5, 7, 6, 3, 7],
       [6, 5, 8, 1, 7, 2, 2, 3, 8, 7],
       [4, 3, 1, 6, 6, 8, 5, 1, 7, 1],
       [3, 4, 8, 5, 3, 4, 5, 4, 4, 0],
       [6, 0, 1, 5, 7, 4, 2, 6, 4, 7],
       [6, 3, 1, 1, 3, 8, 2, 8, 4, 5],
       [5, 0, 1, 4, 1, 8, 3, 5, 3, 7],
       [8, 5, 4, 0, 0, 3, 1, 0, 8, 2]])

In [18]:
# Convert array to pandas DataFrame 
pandas_dataframe_from_series = pd.DataFrame(pandas_array_from_series)

In [19]:
# Display pandas DataFrame 
pandas_dataframe_from_series

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2,1,5,3,6,2,3,4,4,2
1,6,0,4,3,2,7,1,6,7,7
2,8,6,7,8,2,5,7,6,3,7
3,6,5,8,1,7,2,2,3,8,7
4,4,3,1,6,6,8,5,1,7,1
5,3,4,8,5,3,4,5,4,4,0
6,6,0,1,5,7,4,2,6,4,7
7,6,3,1,1,3,8,2,8,4,5
8,5,0,1,4,1,8,3,5,3,7
9,8,5,4,0,0,3,1,0,8,2


In [20]:
# ---------------------------------- Part 3 ------------------------------------
# Generate pandas DataFrame with 100 random floats multiplied by 10 
# Resulting values are >1 & <10
pandas_DataFrame_float = pd.DataFrame(np.random.rand(10,10)) * 10

In [21]:
# Display pandas DataFrame 
pandas_DataFrame_float

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,6.521522,1.672104,4.977712,7.629412,8.469149,7.580516,6.238069,0.206929,5.462915,3.463845
1,3.823402,9.009066,4.947897,0.43269,8.99487,5.130191,2.862963,6.960726,3.101182,7.002643
2,3.543317,8.638948,4.68068,7.553769,0.817155,6.619615,6.846458,5.993192,7.390173,5.179258
3,8.466005,4.167505,5.526958,9.732557,9.305591,2.530418,0.224979,0.2108,6.806977,7.490674
4,8.318185,3.763058,7.42912,9.352155,8.836385,5.677047,9.046548,6.326723,6.248764,4.802752
5,5.585041,9.461534,4.458343,9.086059,3.826323,9.035998,1.6542,3.554387,1.589825,2.881231
6,0.542719,0.408354,6.451495,6.445048,1.167491,8.889926,2.556376,2.34881,5.830714,2.783376
7,1.771505,7.912873,0.892047,8.775187,9.944785,2.110739,0.042008,4.563602,7.381188,0.028218
8,5.672572,8.034244,5.695396,3.661229,0.612384,4.286824,4.915061,0.815071,7.804089,4.14697
9,1.078037,5.750155,2.64489,3.699237,3.372082,7.310789,3.799593,7.535212,7.311196,0.65432


In [22]:
# First attempt: to convert all values to integers 
pandas_DataFrame_integer = pandas_DataFrame_float.apply(pd.to_numeric, downcast='integer')
# see https://pandas.pydata.org/pandas-docs/stable/generated/pandas.to_numeric.html#pandas.to_numeric

In [23]:
pandas_DataFrame_integer

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,6.521522,1.672104,4.977712,7.629412,8.469149,7.580516,6.238069,0.206929,5.462915,3.463845
1,3.823402,9.009066,4.947897,0.43269,8.99487,5.130191,2.862963,6.960726,3.101182,7.002643
2,3.543317,8.638948,4.68068,7.553769,0.817155,6.619615,6.846458,5.993192,7.390173,5.179258
3,8.466005,4.167505,5.526958,9.732557,9.305591,2.530418,0.224979,0.2108,6.806977,7.490674
4,8.318185,3.763058,7.42912,9.352155,8.836385,5.677047,9.046548,6.326723,6.248764,4.802752
5,5.585041,9.461534,4.458343,9.086059,3.826323,9.035998,1.6542,3.554387,1.589825,2.881231
6,0.542719,0.408354,6.451495,6.445048,1.167491,8.889926,2.556376,2.34881,5.830714,2.783376
7,1.771505,7.912873,0.892047,8.775187,9.944785,2.110739,0.042008,4.563602,7.381188,0.028218
8,5.672572,8.034244,5.695396,3.661229,0.612384,4.286824,4.915061,0.815071,7.804089,4.14697
9,1.078037,5.750155,2.64489,3.699237,3.372082,7.310789,3.799593,7.535212,7.311196,0.65432


In [24]:
# Second attempt: to convert floats from first row (0) to integers 
pandas_DataFrame_integer[[0]] = pandas_DataFrame_float[[0]].apply(pd.to_numeric, downcast='integer')

In [25]:
pandas_DataFrame_integer[[0]]

Unnamed: 0,0
0,6.521522
1,3.823402
2,3.543317
3,8.466005
4,8.318185
5,5.585041
6,0.542719
7,1.771505
8,5.672572
9,1.078037


In [26]:
# This defines an anonymous function to round float to the nearest integer 
# see https://www.programiz.com/python-programming/anonymous-function 
# see https://stackoverflow.com/questions/31818050/round-number-to-nearest-integer
round_to_integer = lambda x: int(round(x))
# alternatively: 
# def round_to_integer(x):
#     int(round(x))
#     return x

In [27]:
# Application of the new defined function round_to_integer to the whole DataFrame would be: 
# pandas_DataFrame_float.applymap(round_to_integer)
# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.applymap.html

In [28]:
# Application of the new defined function round_to_integer to the first column (0) 
# see https://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.iloc.html
pandas_DataFrame_float.iloc[:,0] = pandas_DataFrame_float.iloc[:,0].apply(round_to_integer)

In [29]:
# Display pandas DataFrame with the modified first column (0) 
pandas_DataFrame_float

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,7,1.672104,4.977712,7.629412,8.469149,7.580516,6.238069,0.206929,5.462915,3.463845
1,4,9.009066,4.947897,0.43269,8.99487,5.130191,2.862963,6.960726,3.101182,7.002643
2,4,8.638948,4.68068,7.553769,0.817155,6.619615,6.846458,5.993192,7.390173,5.179258
3,8,4.167505,5.526958,9.732557,9.305591,2.530418,0.224979,0.2108,6.806977,7.490674
4,8,3.763058,7.42912,9.352155,8.836385,5.677047,9.046548,6.326723,6.248764,4.802752
5,6,9.461534,4.458343,9.086059,3.826323,9.035998,1.6542,3.554387,1.589825,2.881231
6,1,0.408354,6.451495,6.445048,1.167491,8.889926,2.556376,2.34881,5.830714,2.783376
7,2,7.912873,0.892047,8.775187,9.944785,2.110739,0.042008,4.563602,7.381188,0.028218
8,6,8.034244,5.695396,3.661229,0.612384,4.286824,4.915061,0.815071,7.804089,4.14697
9,1,5.750155,2.64489,3.699237,3.372082,7.310789,3.799593,7.535212,7.311196,0.65432


In [30]:
# That's all for today, folks!