In [None]:
#!pip install --upgrade plotly
#!pip install --upgrade pip

In [1]:
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly import express as px
import plotly.graph_objects as go
pio.templates.default = 'plotly_dark'
color_theme = px.colors.qualitative.Antique
pd.options.mode.chained_assignment = None

### Let’s load our dataset with the help of pandas’s read_csv function, and let’s check our top 5 rows.

In [2]:
#Load the dataset
personal_df = pd.read_csv('russia_losses_personnel.csv')
equipment_df = pd.read_csv('russia_losses_equipment.csv')
equipment_df.head()

Unnamed: 0,date,day,aircraft,helicopter,tank,APC,field artillery,MRL,military auto,fuel tank,drone,naval ship,anti-aircraft warfare,special equipment,mobile SRBM system,greatest losses direction,vehicles and fuel tanks,cruise missiles
0,2022-02-25,2,10,7,80,516,49,4,100.0,60.0,0,2,0,,,,,
1,2022-02-26,3,27,26,146,706,49,4,130.0,60.0,2,2,0,,,,,
2,2022-02-27,4,27,26,150,706,50,4,130.0,60.0,2,2,0,,,,,
3,2022-02-28,5,29,29,150,816,74,21,291.0,60.0,3,2,5,,,,,
4,2022-03-01,6,29,29,198,846,77,24,305.0,60.0,3,2,7,,,,,


### Now, we will perform some basic EDA like checking the columns in our dataset.

In [3]:
equipment_df.columns

Index(['date', 'day', 'aircraft', 'helicopter', 'tank', 'APC',
       'field artillery', 'MRL', 'military auto', 'fuel tank', 'drone',
       'naval ship', 'anti-aircraft warfare', 'special equipment',
       'mobile SRBM system', 'greatest losses direction',
       'vehicles and fuel tanks', 'cruise missiles'],
      dtype='object')

In [4]:
personal_df.columns

Index(['date', 'day', 'personnel', 'personnel*', 'POW'], dtype='object')

### We will look the data types of our columns with the info() function and description of our dataset.

In [5]:
equipment_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272 entries, 0 to 271
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   date                       272 non-null    object 
 1   day                        272 non-null    int64  
 2   aircraft                   272 non-null    int64  
 3   helicopter                 272 non-null    int64  
 4   tank                       272 non-null    int64  
 5   APC                        272 non-null    int64  
 6   field artillery            272 non-null    int64  
 7   MRL                        272 non-null    int64  
 8   military auto              65 non-null     float64
 9   fuel tank                  65 non-null     float64
 10  drone                      272 non-null    int64  
 11  naval ship                 272 non-null    int64  
 12  anti-aircraft warfare      272 non-null    int64  
 13  special equipment          253 non-null    float64

In [6]:
equipment_df.describe()

Unnamed: 0,day,aircraft,helicopter,tank,APC,field artillery,MRL,military auto,fuel tank,drone,naval ship,anti-aircraft warfare,special equipment,mobile SRBM system,vehicles and fuel tanks,cruise missiles
count,272.0,272.0,272.0,272.0,272.0,272.0,272.0,65.0,65.0,272.0,272.0,272.0,253.0,36.0,207.0,207.0
mean,137.5,206.702206,182.283088,1606.393382,3617.481618,892.051471,235.963235,1047.507692,69.323077,666.654412,12.352941,116.886029,81.70751,3.944444,3117.386473,211.917874
std,78.663842,62.058444,51.641512,751.069842,1379.297639,509.932198,102.101238,466.16206,7.545917,444.677169,4.268704,54.265218,46.121749,0.333333,733.406261,101.538448
min,2.0,10.0,7.0,80.0,516.0,49.0,4.0,100.0,60.0,0.0,2.0,0.0,10.0,2.0,1796.0,84.0
25%,69.75,194.0,155.0,1073.25,2599.25,487.0,162.75,600.0,60.0,300.0,10.0,80.75,43.0,4.0,2540.0,137.0
50%,137.5,217.0,188.0,1643.0,3825.5,836.0,247.0,1178.0,73.0,676.0,15.0,108.5,70.0,4.0,3021.0,185.0
75%,205.25,250.25,216.0,2199.75,4692.75,1303.0,312.0,1437.0,76.0,908.75,15.0,168.0,125.0,4.0,3803.5,246.0
max,273.0,278.0,261.0,2897.0,5832.0,1887.0,395.0,1701.0,76.0,1537.0,16.0,209.0,161.0,4.0,4396.0,480.0


In [7]:
personal_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272 entries, 0 to 271
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        272 non-null    object 
 1   day         272 non-null    int64  
 2   personnel   272 non-null    int64  
 3   personnel*  272 non-null    object 
 4   POW         62 non-null     float64
dtypes: float64(1), int64(2), object(2)
memory usage: 10.8+ KB


### We will be looking into our dataset if we have null values and we will drop the POW column from dataset as it is of no use in our analysis.

In [8]:
personal_df.describe

<bound method NDFrame.describe of            date  day  personnel personnel*    POW
0    2022-02-25    2       2800      about    0.0
1    2022-02-26    3       4300      about    0.0
2    2022-02-27    4       4500      about    0.0
3    2022-02-28    5       5300      about    0.0
4    2022-03-01    6       5710      about  200.0
..          ...  ...        ...        ...    ...
267  2022-11-19  269      83880      about    NaN
268  2022-11-20  270      84210      about    NaN
269  2022-11-21  271      84600      about    NaN
270  2022-11-22  272      85000      about    NaN
271  2022-11-23  273      85410      about    NaN

[272 rows x 5 columns]>

In [9]:
personal_df.isnull().sum()

date            0
day             0
personnel       0
personnel*      0
POW           210
dtype: int64

In [10]:
personal_df.drop(columns = ['POW'], inplace = True)

In [11]:
personal_df.isnull().sum()

date          0
day           0
personnel     0
personnel*    0
dtype: int64

### We will merge our ‘fuel tank’ and ‘military auto’ columns into ‘vehicle and fuel tanks’ and ‘mobile SRBM system’ column into ‘cruise missiles’ column, and we will drop the unnecessary columns from our dataset.

In [12]:
equipment_df.loc[:64, 'vechcles and fuel tanks'] = equipment_df.loc[:64, 'fuel tank' ] + equipment_df.loc[:64, 'military auto']
equipment_df.loc[:64, 'cruise missiles'] = equipment_df.loc[:64, 'mobile SRBM system']
equipment_df.drop(columns = ['mobile SRBM system', 'military auto', 'fuel tank'], inplace=True)

equipment_df.head()

Unnamed: 0,date,day,aircraft,helicopter,tank,APC,field artillery,MRL,drone,naval ship,anti-aircraft warfare,special equipment,greatest losses direction,vehicles and fuel tanks,cruise missiles,vechcles and fuel tanks
0,2022-02-25,2,10,7,80,516,49,4,0,2,0,,,,,160.0
1,2022-02-26,3,27,26,146,706,49,4,2,2,0,,,,,190.0
2,2022-02-27,4,27,26,150,706,50,4,2,2,0,,,,,190.0
3,2022-02-28,5,29,29,150,816,74,21,3,2,5,,,,,351.0
4,2022-03-01,6,29,29,198,846,77,24,3,2,7,,,,,365.0


In [13]:
def style():
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(font_color = '#9EDEC6')
    fig.show()

### Now, we will be visualizing the Top 10 Greatest Loss Directions, Troop Losses and Per Day Troop Losses one by one with the help of bar plot, line plot and histogram.

In [16]:
g_l_d = equipment_df['greatest losses direction'].value_counts().reset_index().rename(columns={'index' : 'Direction', 'greatest losses direction' : 'Count'})
fig = px.bar(g_l_d[:10],
            x='Direction', y='Count', color='Direction',
            text='Count', title='Top 10 Greatest Loss Directions',
            color_discrete_sequence=color_theme)
style()

In [17]:
fig = px.line(personal_df, x='date', y='personnel', title='Troop Losses', markers=True)
fig.update_traces(marker=dict(size=3))
fig.update_layout(
xaxis_title='Date',
yaxis_title='Count')
style()

In [18]:
personal_df['Count'] = personal_df['personnel'] - personal_df['personnel'].shift(1)
fig = px.histogram(personal_df, x='day', y='Count', title='Per Day Troop Losses', nbins=max(personal_df['day']))
style()

### Now we will be shifting our greatest losses direction column in the beginning of our dataframe and also looking at the equipment losses.

In [19]:
#moving great Losses direction column to the start of dataframe
columns = list(equipment_df.columns)
columns.remove('greatest losses direction')
columns = ['greatest losses direction'] + columns
equipment_df = equipment_df[columns]

In [20]:
equipment_df.head()

Unnamed: 0,greatest losses direction,date,day,aircraft,helicopter,tank,APC,field artillery,MRL,drone,naval ship,anti-aircraft warfare,special equipment,vehicles and fuel tanks,cruise missiles,vechcles and fuel tanks
0,,2022-02-25,2,10,7,80,516,49,4,0,2,0,,,,160.0
1,,2022-02-26,3,27,26,146,706,49,4,2,2,0,,,,190.0
2,,2022-02-27,4,27,26,150,706,50,4,2,2,0,,,,190.0
3,,2022-02-28,5,29,29,150,816,74,21,3,2,5,,,,351.0
4,,2022-03-01,6,29,29,198,846,77,24,3,2,7,,,,365.0


In [21]:
fig=go.Figure()
titles = []

for i in list(equipment_df.columns[3:]):
    title = i
    if i[0].isupper() == False:
        title = i.title()
    titles += [title]
fig = make_subplots(rows=4, cols=3, subplot_titles=titles)

for i in range(4):
    for j in range(3):
        fig.add_trace(go.Scatter(x=equipment_df['date'], name=titles[j+i*3],y=equipment_df.iloc[:, 3+(j+i*3)]), 
                     row=i+1, col=j+1)
        
fig.update_layout(title='Equipment Losses', showlegend=False, height = 850, width = 750)
style()

In [22]:
#animation_df created for animation of eequipment Losses
max_day = equipment_df['day'].max()
min_day = equipment_df['day'].min()
temp_arr = []

for i in list(equipment_df.columns[3:]):
    temp_arr += [i]*(max_day+1-min_day)
    
temp_dictionary = {'Equipment' : temp_arr}
temp_arr = []

for j in range(len(list(equipment_df.columns[3:]))):
    for i in range(min_day, max_day+1):
        temp_arr += [i]
        
temp_dictionary['Day'] = temp_arr
temp_arr = []
for j in list(equipment_df.columns[3:]):
    for i in equipment_df[j]:
        temp_arr +=[i]
        
temp_dictionary['Count'] = temp_arr

animation_df = pd.DataFrame(temp_dictionary)

In [23]:
fig = px.bar(animation_df, x = 'Equipment', title = 'Equipment Losses',
            y = 'Count', color='Equipment', animation_frame = 'Day',
            range_y=[0, max(animation_df['Count'])], color_discrete_sequence = color_theme)
fig.update_layout(showlegend=False, height = 500, width = 700)
style()

## Conclusion

In this article we have analysed Russia-Ukraine war dataset using python. I hope you liked it, if you have any questions let me know in the comment section below. 