In [2]:
## Pandas data types. The main types in Pandas include:

# object
# int64
# float64
# bool 
# datetime64
# timedelta[ns]
# category 

In [19]:
# Import dependencies 

import pandas as pd

# Read CSV file, set encoding to cp1252 due to UnicodeDecodeError

DataFrame = pd.read_csv('Desktop/climate/temp.csv', encoding='cp1252')

DataFrame.head()

Unnamed: 0,Area Code,Area,Months Code,Months,Element Code,Element,Unit,Y1961,Y1962,Y1963,...,Y2010,Y2011,Y2012,Y2013,Y2014,Y2015,Y2016,Y2017,Y2018,Y2019
0,2,Afghanistan,7001,January,7271,Temperature change,Celsius,0.777,0.062,2.744,...,3.601,1.179,-0.583,1.233,1.755,1.943,3.416,1.201,1.996,2.951
1,2,Afghanistan,7001,January,6078,Standard Deviation,Celsius,1.95,1.95,1.95,...,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95,1.95
2,2,Afghanistan,7002,February,7271,Temperature change,Celsius,-1.743,2.465,3.919,...,1.212,0.321,-3.201,1.494,-3.187,2.699,2.251,-0.323,2.705,0.086
3,2,Afghanistan,7002,February,6078,Standard Deviation,Celsius,2.597,2.597,2.597,...,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597,2.597
4,2,Afghanistan,7003,March,7271,Temperature change,Celsius,0.516,1.336,0.403,...,3.39,0.748,-0.527,2.246,-0.076,-0.497,2.296,0.834,4.418,0.234


In [21]:
## object

# An object stores mixed types, objects can hold any Python object, including strings 

In [44]:
## int64

# An int64 (Integer) stores zero, negative or positive whole numbers without a fractional part & ultimate precision

# Examples of int64 types within the dataset include the Element Code, Months Code & Area Code 

# The 3 int64 columns are assigned to a new DataFrame object named integerDataFrame

integerDataFrame = DataFrame.filter(['Element Code', 'Months Code','Area Code'], axis=1)

# Check that each type is integer by using the dtype function. This will return the type for each column of the DataFrame object
integerDataFrame.dtypes


Element Code    int64
Months Code     int64
Area Code       int64
dtype: object

In [48]:
## float64

# float64 types are numerical values with decimal places, such as 1.5 or 99.1

# Within the dataset, examples of float64 types include the Year columns 

# The float columns are assigned to a new DataFrame object named floatDataFrame 

floatDataFrame = DataFrame.filter(['Y1961', 'Y1981','Y2001'], axis=1)

# Check that each type is float64 by using the dtype function
floatDataFrame.dtypes

Y1961    float64
Y1981    float64
Y2001    float64
dtype: object

In [58]:
## bool

# bool or boolean types hold TRUE or FALSE values 

# boolean logical tests can be applied to datasets, returning either TRUE or FALSE

# Below, a boolean logic test is applied to check whether Y1961 value is greater than or equal to Y1962
# The results are assigned to a new DataFrame 

booleanDataFrame = pd.DataFrame({'Bool Data Type': DataFrame['Y1961'] >= DataFrame['Y1962']})

# Check that type is bool by using the dtype function 
booleanDataFrame.dtypes

Bool Data Type    bool
dtype: object

In [141]:
## datetime64

# datetime64 holds date types in several formats, it enables easier data manipulation for time series data 

# Import datetime dependency

from datetime import datetime

# Create an empty list to hold 

columns = []

# Iterate through each DataFrame column

for col in DataFrame:
    
    # Apply condition if Column begins with Y
    if col[0] == "Y":
        
        # Append the column values that begin with Y to empty list, removing first value by using [1:]
           columns.append(col[1:])

# Create DataFrame for the cleansed column names 
columnsDataFrame = pd.DataFrame({'Column Name': columns})

# Convert Column Name to datetime type. At this stage, the date format should match the date format of the data 
columnsDataFrame['Column Name'] = pd.to_datetime(columnsDataFrame['Column Name'], format='%Y')

# Print result
print(columnsDataFrame.dtypes, columnsDataFrame.head())

Column Name    datetime64[ns]
dtype: object   Column Name
0  1961-01-01
1  1962-01-01
2  1963-01-01
3  1964-01-01
4  1965-01-01


In [164]:
## timedelta[ns]

# timedelta stores the delta between two datetime types 

# Assign two rows of columnsDataFrame to x & y variables for timedelta calculation 
# iloc method applied to select rows based on their index position 
x = columnsDataFrame.iloc[0]
y = columnsDataFrame.iloc[1]


# Calculate the delta between the two variables and assign to timeDelta variable  
timeDelta = y - x

# Print result (365 days between 01-01-1961 (x variable) and 01-01-1962 (y variable))
print(x, y, timeDelta)

Column Name   1961-01-01
Name: 0, dtype: datetime64[ns] Column Name   1962-01-01
Name: 1, dtype: datetime64[ns] Column Name   365 days
dtype: timedelta64[ns]


In [84]:
## category 

categoryDataFrame = DataFrame['Area'].astype('category')

In [None]:
# dtypes Pandas function 

In [None]:
# Converting data types 

