Political support for the National Tuberculosis Programme (NTP) in Azerbaijan is suboptimal. Models of care, including treatment, early diagnosis and treatment of latent tuberculosis (TB) infection should be improved in line with international standards. The laboratory network also needs strengthening. 

There is insufficient support for directly observed therapy (DOT) for susceptible TB at primary health care level. Azerbaijan is among the 27 high multidrug-resistant (MDR) TB burden countries in the world with the third highest MDR-TB rate worldwide.
https://www.euro.who.int/__data/assets/pdf_file/0009/168597/Azerbaijan-country-work-summary_200612.pdf

Azerbaijan remains a high MDR-TB burden country. There is a need to implement countrywide control and innovative measures to accelerate early diagnosis of drug resistance in individual patients, improve treatment adherence and strengthen routine surveillance of drug resistance.https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4547507/

![](https://i.pinimg.com/originals/9b/d1/38/9bd1386937f99ad22f70d2184a7edd37.jpg)pinterest.com

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py
import plotly.express as px

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from colorama import Fore, Style

nRowsRead = 1000 # specify 'None' if want to read whole file
# ham_lyrics.csv has 3634 rows in reality, but we are only loading/previewing the first 1000 rows
df = pd.read_csv('../input/hackathon/task_2-BCG_strain_per_country-1Nov2020.csv', delimiter=',', nrows = nRowsRead)
df.dataframeName = 'task_2-BCG_strain_per_country-1Nov2020.csv'
nRow, nCol = df.shape
print(f'There are {nRow} rows and {nCol} columns')
print(Fore.MAGENTA + 'Data shape: ',Style.RESET_ALL,df.shape)
df.head()

In [None]:
aze = df[(df['country_name']=='Azerbaijan')].reset_index(drop=True)
aze.head()

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in aze.country_name)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Set3', background_color="black").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

In [None]:
#Codes by Pooja Jain https://www.kaggle.com/jainpooja/av-guided-hackathon-predict-youtube-likes/notebook

text_cols = ['BCG Atlas: BCG Strain', 'BCG Atlas: Timing of 1st BCG?', 'mandatory_bcg_strain_2015-2020']

from wordcloud import WordCloud, STOPWORDS

wc = WordCloud(stopwords = set(list(STOPWORDS) + ['|']), random_state = 42)
fig, axes = plt.subplots(2, 2, figsize=(20, 12))
axes = [ax for axes_row in axes for ax in axes_row]

for i, c in enumerate(text_cols):
  op = wc.generate(str(df[c]))
  _ = axes[i].imshow(op)
  _ = axes[i].set_title(c.upper(), fontsize=24)
  _ = axes[i].axis('off')

_ = fig.delaxes(axes[3])

In [None]:
df.isnull().sum()

#Drop columns with NaN.

In [None]:
cols_to_drop=['Unnamed: 19','Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25']
df=df.drop(cols_to_drop,axis=1)
df.columns

#Handling Missing Values.

In [None]:
# categorical features with missing values
categorical_nan = [feature for feature in df.columns if df[feature].isna().sum()>0 and df[feature].dtypes=='O']
print(categorical_nan)

In [None]:
# replacing missing values in categorical features
for feature in categorical_nan:
    df[feature] = df[feature].fillna('None')

In [None]:
df[categorical_nan].isna().sum()

In [None]:
#Code from Gabriel Preda
#plt.style.use('dark_background')
def plot_count(feature, title, df, size=1):
    f, ax = plt.subplots(1,1, figsize=(4*size,4))
    total = float(len(df))
    g = sns.countplot(df[feature], order = df[feature].value_counts().index[:20], palette=('#42b3f5', '#42f554', '#f55442'))
    g.set_title("Number and percentage of {}".format(title))
    if(size > 2):
        plt.xticks(rotation=90, size=8)
    for p in ax.patches:
        height = p.get_height()
        ax.text(p.get_x()+p.get_width()/2.,
                height + 3,
                '{:1.2f}%'.format(100*height/total),
                ha="center") 
    plt.show()

In [None]:
plot_count("BCG Atlas: BCG Strain", "BCG Strain", df,4)

In [None]:
plot_count("BCG Atlas: Which year was vaccination introduced?", "Year in which the vaccination was introduced", df,4)

In [None]:
plot_count("BCG Atlas: Timing of 1st BCG?", "Timing of 1st BCG", df,4)

In [None]:
plot_count("vaccination_timing_unified", "Vaccination Timing Unified", df,3)

In [None]:
plot_count("Year of changes to BCG schedule", "Year of changes to BCG schedule", df,4)

In [None]:
plot_count("BCG Atlas: BCG Recommendation Type", "BCG Recommendation Type", df,4)

In [None]:
plot_count("BCG Atlas: Details of changes", "Details of changes", df,4)

In [None]:
plot_count("BCG Atlas: How long has this BCG vaccine strain been used?", "How long has this BCG vaccine strain been used? ", df,4)

In [None]:
plot_count("mandatory_bcg_strain_2015-2020", "Mandatory BCG Strain 2015-2020", df,4)

In [None]:
plot_count("mandatory_bcg_strain_1960-1970", "Mandatory BCG Strain 1960-1970", df,4)

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/parts-of-speech

plt.figure(figsize=(20,15))
sns.countplot(y="BCG Atlas: BCG Strain",data=df,
             facecolor=(0,0,0,0),linewidth=5,
             edgecolor=sns.color_palette("RdGy"))
plt.title('BCG Strain 1Nov2020',
         fontsize=15);

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/parts-of-speech

plt.figure(figsize=(20,15))
sns.countplot(y="BCG Atlas: Timing of 1st BCG?",data=df,
             facecolor=(0,0,0,0),linewidth=5,
             edgecolor=sns.color_palette("RdGy"))
plt.title('Timing of 1st BCG',
         fontsize=15);

In [None]:
#Code by Taha07  https://www.kaggle.com/taha07/data-scientists-jobs-analysis-visualization/notebook

color = plt.cm.RdBu(np.linspace(0,1,20))
df["BCG Atlas: BCG Strain"].value_counts().sort_values(ascending=False).head(10).plot.pie(y="country_name",colors=color,autopct="%0.1f%%")
plt.title("")
plt.axis("off")
plt.show()

In [None]:
import plotly.offline as pyo
import plotly.graph_objs as go
lowerdf = df.groupby('BCG Atlas: Timing of 1st BCG?').size()/df['country_name'].count()*100
labels = lowerdf.index
values = lowerdf.values

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values,marker_colors = px.colors.sequential.speed, hole=.6)])
fig.show()

In [None]:
#Code from Nitin Datta https://www.kaggle.com/nitindatta/eda-with-r3-id/comments

fig,ax=plt.subplots(figsize=(18,6))
sns.set_style('dark')
plt.subplot(1, 2, 1)
g1=sns.countplot(df['BCG Atlas: BCG Recommendation Type'],palette='rocket', hue = df['vaccination_timing_unified'],**{'hatch':'/','linewidth':3})
g1.set_title("BCG Recommendation Type")
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
g2=sns.countplot(aze['BCG Atlas: Timing of 1st BCG?'],palette='rocket',hue= aze['vaccination_timing_unified'],**{'hatch':'/','linewidth':3})
g2.set_title("Azerbaijan Timing of 1st BCG")

In [None]:
# Count Plot
plt.style.use("classic")
plt.figure(figsize=(8, 6))
sns.countplot(df['vaccination_timing_unified'], palette='rainbow', **{'hatch':'/','linewidth':3})
plt.xlabel("vaccination_timing_unified")
plt.ylabel("Count")
plt.title("Vaccination Timing Unified")
plt.xticks(rotation=45, fontsize=8)
plt.show()

In [None]:
ls ../input/hackathon/task_1-google_search_txt_files_v2/AZ/

In [None]:
Azerbaijan = '../input/hackathon/task_1-google_search_txt_files_v2/AZ/Azerbaijan-en-result-109-original.txt'

In [None]:
text = open(Azerbaijan, 'r',encoding='utf-8',
                 errors='ignore').read()

In [None]:
print(text[:2500])

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Thanks for your patience â€“ please keep coming back to see the improvements, @mpwolke Was Here.' )