# Our database with companies that left Russia in 2022

### База Тиграна с Action, Tone + База старая с INT  + Spark

In [1]:
%run load.ipynb
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from kmodes.kprototypes import KPrototypes

import configlib as lib

In [2]:
df_int = lib.get_v2()
df_actions_tone = lib.get_v3()
spark = lib.get_spark_interfax()

In [3]:
df_int.shape

(4007, 5)

In [4]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", df_int.isnull().sum())

 
Count total NaN at each column in a DataFrame : 

 i              0
Action        27
Industry     420
Country      180
ИНН         3095
dtype: int64


In [5]:
df_actions_tone.shape

(3344, 6)

In [6]:
df_int = df_actions_tone.drop('Unnamed: 0', axis = 1)
df_actions_tone = df_actions_tone.rename(columns = {'tone': 'Tone'})
df_actions_tone = df_actions_tone.sort_values(by='Company')
df_actions_tone = df_actions_tone.reset_index(drop=True)
df_actions_tone.head(1)

Unnamed: 0.1,Unnamed: 0,Company,Action,Industry,Country,Tone
0,2010,1840 & Company,"['suspend', 'aesthetics', 'operations', ',', '...",Health Care,United States,2


In [7]:
# Define a dictionary to map the numbers to their corresponding labels
label_mapping = {0: 'leaved', 1: 'continue', 2: 'paused'}

# Replace the numbers in the specified column with the labels
df_actions_tone['Tone'] = df_actions_tone['Tone'].map(label_mapping)

In [8]:
df_actions_tone.head(10)

Unnamed: 0.1,Unnamed: 0,Company,Action,Industry,Country,Tone
0,2010,1840 & Company,"['suspend', 'aesthetics', 'operations', ',', '...",Health Care,United States,paused
1,2209,1Password,"['suspend', 'new', 'investments/development']",Consumer Discretionary,France,paused
2,1142,1xBet,"['ending', 'new', 'investments', 'where', 'has...",Financials,Netherlands,leaved
3,1924,3HC Semiconductors,"['some', 'clients', 'no', 'longer', 'supplied'...",Materials,France,leaved
4,2323,3M,"['suspend', 'supply', 'of', 'parts', 'and', 'd...",Industrials,Netherlands,continue
5,2197,3M Company,"['suspend', 'new', 'investments', 'and', 'new'...",Health Care,Switzerland,paused
6,48,A&D,"['cease', 'trading', 'operations', 'with', 'ru...",Industrials,Germany,leaved
7,1929,A-SAFE,"['still', 'operating', 'in', 'russia', ';', 's...",Information Technology,Australia,leaved
8,2357,AAK,"['suspended', 'some', 'shipments']",Information Technology,Japan,leaved
9,1082,AB InBev,"['continues', 'flows', 'to', 'russian', 'and',...",Industrials,France,leaved


In [9]:
df_int = df_int.rename(columns={'i': 'Company'})
df_int = df_int.rename(columns={'ИНН': 'INT'})
df_int = df_int.sort_values(by='Company')
df_int = df_int.reset_index(drop=True)
df_int.head(1)

Unnamed: 0,Company,Action,Industry,Country,tone
0,1840 & Company,"['suspend', 'aesthetics', 'operations', ',', '...",Health Care,United States,2


In [10]:
df1 = df_actions_tone.copy()
df2 = df_int.copy()

# Merge the two datasets based on the "Company" column
data = pd.merge(df1, df2, on='Company')
data.head(1)

Unnamed: 0.1,Unnamed: 0,Company,Action_x,Industry_x,Country_x,Tone,Action_y,Industry_y,Country_y,tone
0,2010,1840 & Company,"['suspend', 'aesthetics', 'operations', ',', '...",Health Care,United States,paused,"['suspend', 'aesthetics', 'operations', ',', '...",Health Care,United States,2


In [11]:
# Select the desired columns
desired_columns = ['Company', 'Action_x', 'Industry_x', 'Country_x', 'Tone', 'INT']
data = data[desired_columns]
data.head(1)

KeyError: "['INT'] not in index"

In [None]:
# Rename the columns
data.columns = ['Company', 'Action', 'Industry', 'Country', 'Tone', 'INT']

In [None]:
data

# Spark database

In [None]:
spark.head()

In [None]:
spark.shape

<span style="font-size: 17px;"> Delete all unnecessary columns

In [None]:
columns_to_drop = ['№', 'Наименование', 'Регистрационный номер', 'Краткое наименование', 'Дата регистрации']
spark = spark.drop(columns_to_drop, axis=1)

In [None]:
print(spark.columns)

<span style="font-size: 17px;"> Rename columns 'Наименование на английском' and 'Код налогоплательщика' to 'Company' and 'INT'

In [None]:
spark = spark.rename(columns={"Наименование на английском": "Company"})
spark = spark.rename(columns={"Код налогоплательщика": "INT"})
# spark['Company'] = spark['Company'].str.lower()

In [None]:
spark.head(1)

# Merging two databases into one

In [None]:
df_merged = data.merge(spark, on='INT')

In [None]:
df_merged.head()

In [None]:
df_merged = df_merged.rename(columns={"Company_y": "Company"})

df_merged = df_merged.drop({'Company_x', 'INT', 
                          '2017, Среднесписочная численность работников', 
                          '2018, Среднесписочная численность работников', 
                          '2019, Среднесписочная численность работников',
                          '2020, Среднесписочная численность работников', 
                          '2021, Среднесписочная численность работников',
                          'Мои списки'}, axis=1)

In [None]:
df_merged = df_merged[['Company', 'Industry', 'Country', 'Action', 'Tone', 'Возраст компании, лет',
       'Вид деятельности/отрасль', 'Организационно-правовая форма',
       'Форма собственности', 'Размер компании', '2017, Выручка, RUB',
       '2018, Выручка, RUB', '2019, Выручка, RUB', '2020, Выручка, RUB',
       '2021, Выручка, RUB', '2017, Прибыль (убыток) до налогообложения , RUB',
       '2018, Прибыль (убыток) до налогообложения , RUB',
       '2019, Прибыль (убыток) до налогообложения , RUB',
       '2020, Прибыль (убыток) до налогообложения , RUB',
       '2021, Прибыль (убыток) до налогообложения , RUB']]

In [None]:
df_merged.head()

In [None]:
database = df_merged.copy()

<span style="font-size: 17px;"> The column 'Возраст компании, лет' is of string datatype. To make it float datatype, we replace , with . 

In [None]:
if database['Возраст компании, лет'].dtype == object:
        # Replace commas with periods in the column
        database['Возраст компании, лет'] = database['Возраст компании, лет'].str.replace(',', '.')
database['Возраст компании, лет'] = database['Возраст компании, лет'].astype(float)

<span style="font-size: 17px;"> Some columns has big number with spaces, therefore they are of a string type.  To make them float datatype, we delete all unnecassary spaces

In [None]:
database['2017, Выручка, RUB'] = database['2017, Выручка, RUB'].str.replace(' ', '').astype(float)
database['2018, Выручка, RUB'] = database['2018, Выручка, RUB'].str.replace(' ', '').astype(float)
database['2019, Выручка, RUB'] = database['2019, Выручка, RUB'].str.replace(' ', '').astype(float)
database['2020, Выручка, RUB'] = database['2020, Выручка, RUB'].str.replace(' ', '').astype(float)
database['2021, Выручка, RUB'] = database['2021, Выручка, RUB'].str.replace(' ', '').astype(float)
database['2017, Прибыль (убыток) до налогообложения , RUB'] = database['2017, Прибыль (убыток) до налогообложения , RUB'].str.replace(' ', '').astype(float)
database['2018, Прибыль (убыток) до налогообложения , RUB'] = database['2018, Прибыль (убыток) до налогообложения , RUB'].str.replace(' ', '').astype(float)
database['2019, Прибыль (убыток) до налогообложения , RUB'] = database['2019, Прибыль (убыток) до налогообложения , RUB'].str.replace(' ', '').astype(float)
database['2020, Прибыль (убыток) до налогообложения , RUB'] = database['2020, Прибыль (убыток) до налогообложения , RUB'].str.replace(' ', '').astype(float)
database['2021, Прибыль (убыток) до налогообложения , RUB'] = database['2021, Прибыль (убыток) до налогообложения , RUB'].str.replace(' ', '').astype(float)

In [None]:
database.head(5)

In [None]:
database.shape

# Handling missing values

# Сделать прибыль и выручку из средних за все года

<span style="font-size: 17px;"> Count total NaN at each column in a database

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database.isnull().sum())

<span style="font-size: 17px;"> Visualize missing values in dataset

<span style="font-size: 17px;"> Visualize the number of missing values as a bar chart

In [None]:
import missingno as msno
msno.bar(database)

<span style="font-size: 17px;"> Visualize the correlation between the number of missing values in different columns as a heatmap.

<span style="font-size: 17px;"> As we see below, there is a diagonal with values 0.9, which means that profit and insome highly correlates in terms of null values (if profit value is present, then income value is likely to be present too)

In [None]:
msno.heatmap(database)

# Here we have to fill NaN values of proft with mean !!!!!

<span style="font-size: 17px;"> Delete all rows where more than 6 NaN values exist

In [None]:
database.dropna(thresh=database.shape[1] - 6, inplace=True)

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database.isnull().sum())

In [None]:
database.shape

<span style="font-size: 17px;"> Delete all rows where Nan values exist

In [None]:
database['Mean Revenue'] = database[['2017, Выручка, RUB', '2018, Выручка, RUB', '2019, Выручка, RUB', '2020, Выручка, RUB', '2021, Выручка, RUB']].mean(axis=1)

In [None]:
database.dropna(subset=['Mean Revenue'], inplace=True)
database.drop(['2017, Выручка, RUB', '2018, Выручка, RUB', '2019, Выручка, RUB', '2020, Выручка, RUB', '2021, Выручка, RUB'], axis=1, inplace=True)


In [None]:
database.head()

In [None]:
database.shape

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database.isnull().sum())

In [None]:
database['Mean Profit'] = database[['2017, Прибыль (убыток) до налогообложения , RUB', '2018, Прибыль (убыток) до налогообложения , RUB', '2019, Прибыль (убыток) до налогообложения , RUB', '2020, Прибыль (убыток) до налогообложения , RUB', '2021, Прибыль (убыток) до налогообложения , RUB']].mean(axis=1)

In [None]:
database.dropna(subset=['Mean Revenue'], inplace=True)
database.drop(['2017, Прибыль (убыток) до налогообложения , RUB', '2018, Прибыль (убыток) до налогообложения , RUB', '2019, Прибыль (убыток) до налогообложения , RUB', '2020, Прибыль (убыток) до налогообложения , RUB', '2021, Прибыль (убыток) до налогообложения , RUB'], axis=1, inplace=True)

In [None]:
database.head()

In [None]:
database.shape

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database.isnull().sum())

In [None]:
database.dropna(inplace=True)

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database.isnull().sum())

In [None]:
database.shape

In [None]:
database.describe()

In [None]:
database.corr()

<span style="font-size: 17px;">  Now our dataset has no NaN values and we can start clustarisation

# Clusterisation (k-prototypes)

Если мы оставляем колонку 'Mean Profit', то получается 1 кластер

Если мы удаляем колонку 'Mean Revenue' and 'Mean Profit', то получается 2 кластера: 
0 : 326 и 1 : 218

In [None]:
database2 = database.copy()
database2 = database2.drop({'Company', 'Action','Mean Revenue', 'Mean Profit'} , axis = 1)
# database2 = database2.drop({'Company', 'Action','Вид деятельности/отрасль', 'Организационно-правовая форма','Форма собственности'} , axis = 1)

In [None]:
database2.head(1)

In [None]:
mark_array=database2.values

<span style="font-size: 17px;"> Mark columns that has float datatypes

In [None]:
mark_array[:, 3] = mark_array[:, 3].astype(float)
# mark_array[:, 5] = mark_array[:, 5].astype(float)
# mark_array[:, 6] = mark_array[:, 6].astype(float)
# mark_array[:, 8] = mark_array[:, 8].astype(float)
# mark_array[:, 9] = mark_array[:, 9].astype(float)

In [None]:
mark_array

<span style="font-size: 17px;"> Get the position of categorical columns

In [None]:
catColumnsPos = [database2.columns.get_loc(col) for col in list(database2.select_dtypes('object').columns)]

print('Categorical columns           : {}'.format(list(database2.select_dtypes('object').columns)))
print('Categorical columns position  : {}'.format(catColumnsPos))

In [None]:
# Convert dataframe to matrix
dfMatrix = database2.to_numpy()

In [None]:
dfMatrix 

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", database2.isnull().sum())

<span style="font-size: 17px;"> Elbow method to detect number of clusters K

In [None]:
from kmodes.kprototypes import KPrototypes

cost = []
for cluster in range(1, 10):
    try:
        kprototype = KPrototypes(n_jobs = -1, n_clusters = cluster, init = 'Huang', random_state = 0)
        kprototype.fit_predict(database2, categorical = catColumnsPos)
        cost.append(kprototype.cost_)
        print('Cluster initiation: {}'.format(cluster))
    except:
        break

plt.plot(cost)
plt.xlabel('K')
plt.ylabel('cost')
plt.show

In [None]:
cost

In [None]:
kproto = KPrototypes(n_clusters=2, verbose=2,max_iter=30)
clusters = kproto.fit_predict(mark_array, categorical=[0, 1, 2, 4, 5, 6, 7])

In [None]:
print(kproto.cluster_centroids_)

In [None]:
cluster_dict=[]
for c in clusters:
    cluster_dict.append(c)

In [None]:
cluster_dict

In [None]:
database2['cluster']=cluster_dict

In [None]:
for i in range(0,5):
    print(i, ": ", database2[database2['cluster']== i].value_counts().sum())

In [None]:
database2

In [None]:
database2[database2['cluster']== 0].head(10)

In [None]:
database2[database2['cluster']== 1].head(10)

In [None]:
database2[database2['cluster']== 2].head(10)

<span style="font-size: 17px;"> Visualisation:

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

database2.columns = ["Action", "Tone", "Возраст компании, лет", "Вид деятельности/отрасль", "Организационно-правовая форма", "Форма собственности", "cluster"]
sns.pairplot(database2, hue="cluster")

In [None]:
sns.boxplot(x="cluster", y="Возраст компании, лет", data=database2)


In [None]:
sns.boxplot(x="cluster", y="Организационно-правовая форма", data=database2)

In [None]:
sns.boxplot(x="cluster", y="Форма собственности", data=database2)

## Clusterisation of dataset with income and profit

In [None]:
df_filtered = database.copy()
df_filtered = df_filtered.drop({'Company', 'Action', 'Вид деятельности/отрасль'}, axis=1)

In [None]:
df_filtered.head(1)

In [None]:
df_cat = df_filtered[['Industry', 'Country', 'Tone', 'Организационно-правовая форма', 'Форма собственности', 'Размер компании']]

for i in df_cat.columns:
    print(df_cat[i].unique())

<span style="font-size: 17px;"> Check that there are no NaN values in the dataset</span>

In [None]:
print(" \nCount total NaN at each column in a DataFrame : \n\n", df_filtered.isnull().sum())

In [None]:
mark_array=df_filtered.values

<span style="font-size: 17px;"> Mark columns that has float datatypes

In [None]:
mark_array[:, 2] = mark_array[:, 2].astype(float)
mark_array[:, 7] = mark_array[:, 7].astype(float)
mark_array[:, 8] = mark_array[:, 8].astype(float)
mark_array[:, 9] = mark_array[:, 9].astype(float)
mark_array[:, 10] = mark_array[:, 10].astype(float)
mark_array[:,11] = mark_array[:, 11].astype(float)

<span style="font-size: 17px;"> Get the position of categorical columns

In [None]:
catColumnsPos = [df_filtered.columns.get_loc(col) for col in list(df_filtered.select_dtypes('object').columns)]

print('Categorical columns           : {}'.format(list(df_filtered.select_dtypes('object').columns)))
print('Categorical columns position  : {}'.format(catColumnsPos))

In [None]:
# Convert dataframe to matrix
dfMatrix = df_filtered.to_numpy()

<span style="font-size: 17px;"> Elbow method to detect number of clusters K

In [None]:
from kmodes.kprototypes import KPrototypes

cost = []
for cluster in range(1, 10):
    try:
        kprototype = KPrototypes(n_jobs = -1, n_clusters = cluster, init = 'Huang', random_state = 0)
        kprototype.fit_predict(dfMatrix, categorical = catColumnsPos)
        cost.append(kprototype.cost_)
        print('Cluster initiation: {}'.format(cluster))
    except:
        break

plt.plot(cost)
plt.xlabel('K')
plt.ylabel('cost')
plt.show

<span style="font-size: 17px;">  The elbow is at point 3 => numbers of clusters = 3

In [None]:
mark_array

In [None]:
kproto = KPrototypes(n_clusters=3, verbose=2,max_iter=30)
clusters = kproto.fit_predict(mark_array, categorical=[0, 1, 4, 5, 6])

In [None]:
print(kproto.cluster_centroids_)

In [None]:
cluster_dict=[]
for c in clusters:
    cluster_dict.append(c)

In [None]:
cluster_dict

In [None]:
df_filtered['Cluster']=cluster_dict

In [None]:
df_filtered

In [None]:
df_filtered[df_filtered['Cluster']== 0].head(10)

In [None]:
df_filtered[df_filtered['Cluster']== 1].head(10)

In [None]:
df_filtered[df_filtered['Cluster']== 2].head(10)

<span style="font-size: 17px;"> Let's visualize our clusters

In [None]:
#the volume of each cluster
df_filtered['Cluster'].value_counts().plot(kind='bar')

plt.xticks(fontsize=10, rotation=0, ha='right')

# Graphs

In [None]:
import matplotlib.pyplot as plt

In [None]:
database3 = database.copy()
database3

### Countries

<span style="font-size: 17px;"> Value_counts() returns the count of unique values in the 'Country' column, and head(10) selects the first 10 values, which represent the top 10 countries with the highest counts.</span>

In [None]:
df_countries = database3['Country'].value_counts().head(10)

In [None]:
df_countries = df_countries.to_frame().reset_index()

In [None]:
df_countries

In [None]:
# Increase the figure size and adjust the spacing
plt.figure(figsize=(12, 8))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_countries.plot(kind='bar', x='index', y='Country', color='pink')

# Set the labels and title
plt.xlabel('Country', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Country', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=45, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines (borders)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

plt.show()

### Industries:

In [None]:
df_industries = database3['Industry'].value_counts()
df_industries = df_industries.to_frame().reset_index()

In [None]:
import matplotlib.pyplot as plt

# Increase the figure size and adjust the spacing
plt.figure(figsize=(16, 8))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_industries.plot(kind='bar', x='index', y='Industry', color='#a7e8a9')

# Set the labels and title
plt.xlabel('Industry', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Industry', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=45, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

plt.show()

### Size

In [None]:
df_size = database3['Размер компании'].value_counts()
df_size = df_size.to_frame().reset_index()

In [None]:
import matplotlib.pyplot as plt

# Increase the figure size and adjust the spacing
plt.figure(figsize=(14, 8))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_size.plot(kind='bar', x='index', y='Размер компании', color='#b6dffc')

# Set the labels and title
plt.xlabel('Size', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Size', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=45, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Show the plot
plt.show()

In [None]:
database3.head()

### Age

In [None]:
database3['Возраст компании, лет'].plot(kind='hist', bins=30)

plt.xlabel('Age of Company')
plt.ylabel('Count')
plt.title('Distribution of Company Age')

plt.show()

### Organisational form

In [None]:
df_form = database3['Организационно-правовая форма'].value_counts().head(5)
df_form = df_form.to_frame().reset_index()
df_form

In [None]:
# Increase the figure size and adjust the spacing
plt.figure(figsize=(10, 5))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_form.plot(kind='bar', x='index', y='Организационно-правовая форма', color='#ccccff')

# Set the labels and title
plt.xlabel('Type of form', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Organisational form', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=45, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Show the plot
plt.show()

### Form of property

In [None]:
df_form2 = database3['Форма собственности'].value_counts().head(5)
df_form2 = df_form2.to_frame().reset_index()
df_form2

In [None]:
# Increase the figure size and adjust the spacing
plt.figure(figsize=(10, 5))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_form2.plot(kind='bar', x='index', y='Форма собственности', color='#ffcc99')

# Set the labels and title
plt.xlabel('Type of form', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Form of property', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=45, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Show the plot
plt.show()

### Tone
* 0 - completely stoped operations in Russia
* 1 - continue operations in Russia
* 2 - paused operations in Russia

In [None]:
df_tone = database3['Tone'].value_counts().head(5)
df_tone

In [None]:
# Increase the figure size and adjust the spacing
plt.figure(figsize=(10, 5))
plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)

# Plot the bar chart
df_tone.plot(kind='bar', x='index', y='Tone', color='#ffcc99')

# Set the labels and title
plt.xlabel('Tone', fontsize=12, labelpad=10)
plt.ylabel('Number of Companies', fontsize=12, labelpad=10)
plt.title('Number of Companies per Tone', fontsize=14, fontweight='bold', pad=20)

# Adjust the font size and rotation of the x-axis labels
plt.xticks(fontsize=10, rotation=0, ha='right')

# Adjust the font size of the y-axis labels
plt.yticks(fontsize=10)

# Remove the top and right spines
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

# Add grid lines to the plot
plt.grid(axis='y', linestyle='--', alpha=0.5)

# Show the plot
plt.show()