# OVERVIEW 

### This dataset has temperature readings of an entreprise building room, both iniside and outside the building at random intervals. The recording speed was per second.

1.  Dataset details:
    * id : unique IDs for each reading
    * room_id/id : room id in which device was installed.
    * noted_date : date and time of reading
    * temp : temperature readings
    * out/in : whether reading was taken from device installed inside or outside of room
2.  Task Completed:
    * Variance of temp for inside - outside room temp?
    * How outside temp was related to inside temp?


In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# 1. Read data files and Provide some basic Insights

### Random 5 rows

In [None]:
data = pd.read_csv("../input/temperature-readings-iot-devices/IOT-temp.csv")
data.sample(5)

### First 5 rows

In [None]:
data.head()

### Last 5 rows

In [None]:
data.tail()

In [None]:
print("Shape of our data is : ",data.shape)

In [None]:
print("Unique values in every column \n"+'-'*25)
for i in data.columns:
    print("\t"+i+" = ",len(set(data[i])))

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
df = data.drop(['id','room_id/id'],axis=1)
df.head()

# 2. Data Analysis

### Check for Missing values

In [None]:
data.isnull().sum()

### Separate date and time

In [None]:
date=[]
time=[]
for i in df['noted_date']:
    date.append(i.split(' ')[0])
    time.append(i.split(' ')[1])
df['date']=date
df['time']=time

In [None]:
df.drop('noted_date',axis=1,inplace=True)
df.head()

In [None]:
df[['outside','inside']]=pd.get_dummies(df['out/in'])
df.rename(columns = {'out/in':'location'}, inplace = True)

In [None]:
print('Total Inside Observations  :',len([i for i in df['inside'] if  i == 1]))
print('Total Outside Observations :',len([i for i in df['inside'] if  i == 0]))

### Let's separate date further into days,months and year

In [None]:
try:
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df.date.dt.month
    df['day'] = df.date.dt.day
    df.drop('date',axis=1,inplace=True)
except:
    print('Operations already performed')
df.head()

In [None]:
print("Days of observation   : ",sorted(df['day'].unique()))
print("Months of observation : ",sorted(df['month'].unique()))
print("Year of observation   : ",sorted(df['year'].unique()))

In [None]:
print("Temperature -> \n"+"-"*30)
print("\tTotal Count    = ",df['temp'].shape[0])
print("\tMinimum Value  = ",df['temp'].min())
print("\tMaximum Value  = ",df['temp'].max())
print("\tMean Value     = ",df['temp'].mean())
print("\tStd dev Value  = ",df['temp'].std())
print("\tVariance Value = ",df['temp'].var())

### Reassemble whole dataframe and print the new detailed dataframe

In [None]:
df = df[['day','month','year','time','temp','location','outside','inside']]
df.head()

# 3. Data Visualization

In [None]:
sns.boxplot(df['temp'])
plt.show()

In [None]:
sns.countplot(df['inside'])

In [None]:
sns.barplot(df['location'],df['temp'])
plt.show()

In [None]:
sns.barplot(df['location'],df['temp'])
plt.show()

In [None]:
sns.scatterplot(df['month'],df['temp'],hue=df['inside'])

In [None]:
sns.scatterplot(df['day'],df['temp'],hue=df['inside'])

In [None]:
sns.heatmap(df.corr())

In [None]:
sns.pairplot(df)

# 4. TASKS

In [None]:
arr = df['inside']
x=[]
y=[]
for i in arr:
    if i==1:
        x.append(i)
    else :
        y.append(i)
x=pd.Series(x)
y=pd.Series(y)
type(arr)

### Variance of temp for inside - outside room temp ?
>       Outcome : The temperature outside has larger variance than inside temperature.

In [None]:
fig,axes = plt.subplots(1,3,figsize=(18,5))
sns.violinplot(x,df['temp'],ax=axes[0],color='b').set_title("Inside v/s Temp")
sns.violinplot(y,df['temp'],ax=axes[1],color='r').set_title("Outside v/s Temp")
sns.violinplot(df['location'],df['temp'],ax=axes[2]).set_title("Location v/s Temp")

### How outside temp was related to inside temp ?
>             Outcome:  Inside temp is free from any variations in data so follows a flat/linear trend, and outside temp show a bit variations and shows some seasionality with trends

In [None]:
sns.lineplot(df['day'],df['temp'],hue=df['location'])

## Do Not Forget To Upvote And Add your Comments If You Like The Kernel

# THANK YOU :D