In [1]:
import pandas as pd
import numpy as np

----
# Creating a DataFrame
### Creating using a 2D array

In [2]:
# Create array of random data
arr = np.random.randn(6, 4)

# Index
# This creates an array 6 elements all having the current date&time
dates = pd.date_range("today", periods=6)

# Column names
columns = np.array(["A", "B", "C", "D"])

In [3]:
df = pd.DataFrame(arr, index=dates, columns=columns)
df

Unnamed: 0,A,B,C,D
2023-05-28 10:09:34.891827,1.422774,-0.347108,0.764869,-0.631159
2023-05-29 10:09:34.891827,-0.674994,1.439592,-1.005354,-0.971082
2023-05-30 10:09:34.891827,1.298287,-0.919434,0.264295,-1.141962
2023-05-31 10:09:34.891827,-0.888588,-1.358004,-0.727055,0.634032
2023-06-01 10:09:34.891827,0.712415,2.784897,-0.058468,-0.547983
2023-06-02 10:09:34.891827,-0.12344,-0.341684,-0.231102,0.156798


### Creating using a Dictionary

In [4]:
data = {
    "animal": [
        "cat",
        "cat",
        "snake",
        "dog",
        "dog",
        "cat",
        "snake",
        "cat",
        "dog",
        "dog",
    ],
    "age": [2.5, 3, 0.5, np.nan, 5, 2, 4, np.nan, 7, 4],
    "visits": [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
    "priority": ["yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no"],
}

df2 = pd.DataFrame(data)
df2

Unnamed: 0,animal,age,visits,priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.0,3,no
6,snake,4.0,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,4.0,1,no


----
### View datatypes of the dataframe

In [5]:
df2.dtypes

animal       object
age         float64
visits        int64
priority     object
dtype: object

### Head and tail

In [6]:
# Default value is 5
df2.head(2)
df2.tail(2)

Unnamed: 0,animal,age,visits,priority
8,dog,7.0,2,no
9,dog,4.0,1,no


### View index, columns

In [7]:
df2.index
df2.columns

Index(['animal', 'age', 'visits', 'priority'], dtype='object')

### Describe the data

In [8]:
df2.describe()

Unnamed: 0,age,visits
count,8.0,10.0
mean,3.5,1.9
std,1.982062,0.875595
min,0.5,1.0
25%,2.375,1.0
50%,3.5,2.0
75%,4.25,2.75
max,7.0,3.0


In [9]:
df2.mean()

  df2.mean()


age       3.5
visits    1.9
dtype: float64

In [10]:
df2.sum()

animal      catcatsnakedogdogcatsnakecatdogdog
age                                       28.0
visits                                      19
priority              yesyesnoyesnononoyesnono
dtype: object

----
# Turn df back to numpy array

In [11]:
df2.values

array([['cat', 2.5, 1, 'yes'],
       ['cat', 3.0, 3, 'yes'],
       ['snake', 0.5, 2, 'no'],
       ['dog', nan, 3, 'yes'],
       ['dog', 5.0, 2, 'no'],
       ['cat', 2.0, 3, 'no'],
       ['snake', 4.0, 1, 'no'],
       ['cat', nan, 1, 'yes'],
       ['dog', 7.0, 2, 'no'],
       ['dog', 4.0, 1, 'no']], dtype=object)

----
# Copying a dataframe

In [12]:
df3 = df2.copy()
df3

Unnamed: 0,animal,age,visits,priority
0,cat,2.5,1,yes
1,cat,3.0,3,yes
2,snake,0.5,2,no
3,dog,,3,yes
4,dog,5.0,2,no
5,cat,2.0,3,no
6,snake,4.0,1,no
7,cat,,1,yes
8,dog,7.0,2,no
9,dog,4.0,1,no
