SUPERCOMPUTERS (Faster, More Powerful and Technologically Superior)

A supercomputer is a computer with a high level of performance as compared to a general-purpose computer.

The performance of a supercomputer is commonly measured in floating-point operations per second (FLOPS) instead of million instructions per second (MIPS).

Since 2017, there are supercomputers which can perform over 1017 FLOPS (a hundred quadrillion FLOPS, 100 petaFLOPS or 100 PFLOPS). 

Since November 2017, all of the world's fastest 500 supercomputers run Linux-based operating systems. Additional research is being conducted in the United States, the European Union, Taiwan, Japan, and China to build faster, more powerful and technologically superior exascale supercomputers
https://en.wikipedia.org/wiki/Supercomputer

![](https://mma.prnewswire.com/media/659152/Super_Micro_Computer_GPU_Servers.jpg?p=publish&w=950)prnewswire.com

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import math
from textwrap import wrap

warnings.filterwarnings('ignore')
sns.set_palette('Set2')
sns.set_style('darkgrid')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/top-500-supercomputers/GREEN500.csv', encoding='ISO-8859-2')
df.head()

In [None]:
df.isnull().sum()

#Code by Mohammad Imran Shaikh https://www.kaggle.com/shikhnu/covid19-tweets-eda-visualization-wordcloud

In [None]:
unique_df = pd.DataFrame()
unique_df['Features'] = df.columns
unique=[]
for i in df.columns:
    unique.append(df[i].nunique())
unique_df['Uniques'] = unique

f, ax = plt.subplots(1,1, figsize=(15,7))

splot = sns.barplot(x=unique_df['Features'], y=unique_df['Uniques'], alpha=0.8)
for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center',
                   va = 'center', xytext = (0, 9), textcoords = 'offset points')
plt.title('Bar plot for number of unique values in each column',weight='bold', size=15)
plt.ylabel('#Unique values', size=12, weight='bold')
plt.xlabel('Features', size=12, weight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
import plotly.express    as px 

fig = px.treemap(df.head(20), path=['Name'], values='TOP500 Rank',
                title="<b>TreeMap for Top 20 Names by TOP500 Rank</b>",
                 color='Rank') 
fig.show()

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in df.Computer)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Set2', background_color="grey").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

#Codes by YiYuan https://www.kaggle.com/latong/food-review-text-summarization/notebook

In [None]:
sum=df['Computer'].str.len()
print(sum)

In [None]:
# let's check the length of summaries, the average length is 20 characters.
df['computer length'] = df['Computer'].apply(len)
df['computer length'].describe()

In [None]:
sns.boxplot(x='Year', y=df['computer length'], data=df)

#Codes by Ayush Garg https://www.kaggle.com/ayushggarg/covid-19-health-services-disruption-india-eda/notebook

Columns with missing values

In [None]:
pd.set_option('display.max_colwidth', -1)
df_NA = pd.DataFrame(data=[df.isna().sum().tolist(), ["{:.2f}".format(i)+'%' for i in (df.isna().sum()/df.shape[0]*100).tolist()]], 
            columns=df.columns, index=['NA_Count', 'NA_Percent']).transpose()
df_NA[df_NA['NA_Count']>0].transpose()

Get rid of the columns having missing data > 50%

In [None]:
s = (df.isna().sum()/df.shape[0]*100)<50
df_modified = df[s.index[s].tolist()]
print (df_modified.shape)
df_modified.head()

In [None]:
plt.rcParams['font.size'] = 14
fig, ax = plt.subplots(3, 2, figsize=(20,20))
for col, ax in zip(['Continent','Interconnect Family','Interconnect','Country','Segment','System Model','Processor Generation'], ax.flat):
    dict_ = df_modified[col].value_counts().head(10).to_dict()
    if ('Not Available' in dict_.keys()):
        dict_.pop('Not Available')
    labels = []
    for i in dict_.keys():
        i = i.split(' ')
        if (len(i) > 6):
            i[math.ceil(len(i)/2)-1] += '\n'
            labels.append(' '.join(i))
        else:
            labels.append(' '.join(i))
    ax.pie(x=list(dict_.values()), labels=labels, shadow=True, startangle=0)
    
    col = (' '.join(col.split('_'))).upper()
    ax.set_title(col, weight='bold', fontsize=18)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(18,10))
figure = plt.figure(figsize=(10,6))
sns.countplot(y='Interconnect Family', hue='Manufacturer', data=df_modified.replace('Not Available', np.nan))
labels = ['\n'.join(wrap(l, 40)) for l in df_modified['Manufacturer']]
plt.legend(labels, shadow=True, facecolor='lightyellow', title='Computers Manufacturer', title_fontsize=18)
plt.xlabel('Count', weight='bold', fontsize=18)
plt.ylabel('Interconnect Family', weight='bold', fontsize=18)
plt.show()

In [None]:
plt.figure(figsize=(18,8))
sns.countplot(y='Segment', hue='Site', data=df_modified.replace('Not Available', np.nan))
plt.legend(shadow=True, facecolor='lightyellow', title='Computers Site', title_fontsize=18)
plt.xlabel('Count', weight='bold', fontsize=18)
plt.ylabel('Segment', weight='bold', fontsize=18)
plt.show()

In [None]:
#Code by Siti K https://www.kaggle.com/khotijahs1/2020-indonesia-university-rank/comments

#Top 20 Computers by Ranking
top_computer = df.sort_values(by='TOP500 Rank', ascending=False)[:20]
figure = plt.figure(figsize=(10,6))
sns.barplot(y=top_computer.Rank, x=top_computer.Manufacturer)
plt.xticks(rotation=45)
plt.xlabel('Manufacturer Name')
plt.ylabel('Count')
plt.title('Manufacturers Ranking')
plt.show()

In [None]:
#Code by Siti K https://www.kaggle.com/khotijahs1/2020-indonesia-university-rank/comments

#Top 20 Computers by Ranking
top_computer = df.sort_values(by='TOP500 Rank', ascending=False)[:20]
figure = plt.figure(figsize=(10,6))
sns.barplot(y=top_computer.Rank, x=top_computer.Processor)
plt.xticks(rotation=45)
plt.xlabel('Processor')
plt.ylabel('Count')
plt.title('Processors Ranking')
plt.show()

In [None]:
#Code by Siti K https://www.kaggle.com/khotijahs1/2020-indonesia-university-rank/comments

#Top 20 Computers by Ranking
top_computer = df.sort_values(by='Name', ascending=False)[:20]
figure = plt.figure(figsize=(10,6))
sns.barplot(y=top_computer.Rank, x=top_computer.Computer)
plt.xticks(rotation=45)
plt.xlabel('Computer')
plt.ylabel('Count')
plt.title('Computers Ranking')
plt.show()

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Be patient. Mar√≠lia Prata, @mpwolke was Here without a SuperComputer' )