In [2]:
# Data Preparation and Analysis

## Import Libraries

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
buildings_area_data = pd.read_excel('../data/Площади зданий.xlsx')
contracts_data = pd.read_excel('../data/Договоры.xlsx')
contract_building_relation_data = pd.read_excel('../data/Связь договор - здания.xlsx')
assets_data = pd.read_excel('../data/Основные средства.xlsx')
bills_data = pd.read_excel('../data/Счета на оплату.xlsx')
service_codes_data = pd.read_excel('../data/Коды услуг.xlsx')

# Display first few rows of each dataset
print("Buildings Area Data")
print(buildings_area_data.head())
print("\nContracts Data")
print(contracts_data.head())
print("\nContract Building Relation Data")
print(contract_building_relation_data.head())
print("\nAssets Data")
print(assets_data.head())
print("\nBills Data")
print(bills_data.head())
print("\nService Codes Data")
print(service_codes_data.head())

## Data Cleaning

# Check for missing values
print("Missing values in Bills Data:")
print(bills_data.isnull().sum())

# Drop rows with missing values for simplicity
bills_data.dropna(inplace=True)

# Verify that there are no more missing values
print("Missing values after cleaning:")
print(bills_data.isnull().sum())

## Exploratory Data Analysis

# Visualizations and exploratory analysis
sns.pairplot(bills_data)
plt.show()

# Correlation matrix
numeric_bills_data = bills_data.select_dtypes(include=[float, int])  # Только числовые данные
corr = numeric_bills_data.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.show()

## Save Cleaned Data

# Save cleaned data for further processing
bills_data.to_excel('../data/cleaned_bills.xlsx', index=False)
print("Cleaned data saved to '../data/cleaned_bills.xlsx'")

